rustledger_loader/process.rs
1//! Processing pipeline: sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
2//!
3//! This module orchestrates the full processing pipeline for a beancount ledger,
4//! equivalent to Python's `loader.load_file()` function.
5
6use crate::{LoadError, LoadResult, Options, Plugin, SourceMap};
7use rustledger_core::{BookingMethod, Directive, DisplayContext};
8use rustledger_parser::Spanned;
9use std::path::Path;
10use thiserror::Error;
11
12/// Options for loading and processing a ledger.
13#[derive(Debug, Clone)]
14pub struct LoadOptions {
15 /// Booking method for lot matching (default: Strict).
16 pub booking_method: BookingMethod,
17 /// Run plugins declared in the file (default: true).
18 pub run_plugins: bool,
19 /// Run `auto_accounts` plugin (default: false).
20 pub auto_accounts: bool,
21 /// Additional native plugins to run (by name).
22 pub extra_plugins: Vec<String>,
23 /// Plugin configurations for extra plugins.
24 pub extra_plugin_configs: Vec<Option<String>>,
25 /// Run validation after processing (default: true).
26 pub validate: bool,
27 /// Enable path security (prevent include traversal).
28 pub path_security: bool,
29}
30
31impl Default for LoadOptions {
32 fn default() -> Self {
33 Self {
34 booking_method: BookingMethod::Strict,
35 run_plugins: true,
36 auto_accounts: false,
37 extra_plugins: Vec::new(),
38 extra_plugin_configs: Vec::new(),
39 validate: true,
40 path_security: false,
41 }
42 }
43}
44
45impl LoadOptions {
46 /// Create options for raw loading (no booking, no plugins, no validation).
47 #[must_use]
48 pub const fn raw() -> Self {
49 Self {
50 booking_method: BookingMethod::Strict,
51 run_plugins: false,
52 auto_accounts: false,
53 extra_plugins: Vec::new(),
54 extra_plugin_configs: Vec::new(),
55 validate: false,
56 path_security: false,
57 }
58 }
59}
60
61/// Errors that can occur during ledger processing.
62#[derive(Debug, Error)]
63pub enum ProcessError {
64 /// Loading failed.
65 #[error("loading failed: {0}")]
66 Load(#[from] LoadError),
67
68 /// Booking/interpolation error.
69 #[cfg(feature = "booking")]
70 #[error("booking error: {message}")]
71 Booking {
72 /// Error message.
73 message: String,
74 /// Date of the transaction.
75 date: rustledger_core::NaiveDate,
76 /// Narration of the transaction.
77 narration: String,
78 },
79
80 /// Plugin execution error.
81 #[cfg(feature = "plugins")]
82 #[error("plugin error: {0}")]
83 Plugin(String),
84
85 /// Validation error.
86 #[cfg(feature = "validation")]
87 #[error("validation error: {0}")]
88 Validation(String),
89
90 /// Plugin output conversion error.
91 #[cfg(feature = "plugins")]
92 #[error("failed to convert plugin output: {0}")]
93 PluginConversion(String),
94}
95
96/// A fully processed ledger.
97///
98/// This is the result of loading and processing a beancount file,
99/// equivalent to the tuple returned by Python's `loader.load_file()`.
100#[derive(Debug)]
101pub struct Ledger {
102 /// Processed directives (sorted, booked, plugins applied).
103 pub directives: Vec<Spanned<Directive>>,
104 /// Options parsed from the file.
105 pub options: Options,
106 /// Plugins declared in the file.
107 pub plugins: Vec<Plugin>,
108 /// Source map for error reporting.
109 pub source_map: SourceMap,
110 /// Errors encountered during processing.
111 pub errors: Vec<LedgerError>,
112 /// Display context for formatting numbers.
113 pub display_context: DisplayContext,
114}
115
116/// Unified error type for ledger processing.
117///
118/// This encompasses all error types that can occur during loading,
119/// booking, plugin execution, and validation.
120#[derive(Debug)]
121#[non_exhaustive]
122pub struct LedgerError {
123 /// Error severity.
124 pub severity: ErrorSeverity,
125 /// Error code (e.g., "E0001", "W8002").
126 pub code: String,
127 /// Human-readable error message.
128 pub message: String,
129 /// Source location, if available.
130 pub location: Option<ErrorLocation>,
131 /// Byte span (inclusive start, exclusive end) in the source file,
132 /// used by rich renderers (e.g. miette) to draw a snippet around
133 /// the offending directive. Consumers that only need `file:line:col`
134 /// should use `location`; those that want to show the surrounding
135 /// source text want this.
136 pub source_span: Option<(usize, usize)>,
137 /// Source file ID — index into the ledger's [`SourceMap`]. Used
138 /// alongside `source_span` for snippet rendering.
139 pub file_id: Option<u16>,
140 /// Processing phase that produced this error: "parse", "validate", or "plugin".
141 pub phase: String,
142}
143
144/// Error severity level.
145#[derive(Debug, Clone, Copy, PartialEq, Eq)]
146pub enum ErrorSeverity {
147 /// Error - indicates a problem that should be fixed.
148 Error,
149 /// Warning - indicates a potential issue.
150 Warning,
151}
152
153/// Source location for an error.
154#[derive(Debug, Clone)]
155pub struct ErrorLocation {
156 /// File path.
157 pub file: std::path::PathBuf,
158 /// Line number (1-indexed).
159 pub line: usize,
160 /// Column number (1-indexed).
161 pub column: usize,
162}
163
164impl LedgerError {
165 /// Create a new error with the given phase.
166 pub fn error(code: impl Into<String>, message: impl Into<String>) -> Self {
167 Self {
168 severity: ErrorSeverity::Error,
169 code: code.into(),
170 message: message.into(),
171 location: None,
172 source_span: None,
173 file_id: None,
174 phase: "validate".to_string(),
175 }
176 }
177
178 /// Create a new warning.
179 pub fn warning(code: impl Into<String>, message: impl Into<String>) -> Self {
180 Self {
181 severity: ErrorSeverity::Warning,
182 code: code.into(),
183 message: message.into(),
184 location: None,
185 source_span: None,
186 file_id: None,
187 phase: "validate".to_string(),
188 }
189 }
190
191 /// Attach a source span and file ID so rich renderers can draw a snippet.
192 #[must_use]
193 pub const fn with_source_span(mut self, span: (usize, usize), file_id: u16) -> Self {
194 self.source_span = Some(span);
195 self.file_id = Some(file_id);
196 self
197 }
198
199 /// Set the processing phase for this error.
200 #[must_use]
201 pub fn with_phase(mut self, phase: impl Into<String>) -> Self {
202 self.phase = phase.into();
203 self
204 }
205
206 /// Add a location to this error.
207 #[must_use]
208 pub fn with_location(mut self, location: ErrorLocation) -> Self {
209 self.location = Some(location);
210 self
211 }
212}
213
214/// Process a raw load result into a fully processed ledger.
215///
216/// Pipeline (see numbered comments below for the rationale of each step):
217///
218/// ```text
219/// 1. sort (canonical display order)
220/// 2. synth plugins (auto_accounts, document_discovery)
221/// 3. Early validation (account presence, structural, lifecycle)
222/// 4. booking (cost spec resolution, interpolation)
223/// 5. partition (set aside failed-booking txns)
224/// 6. regular plugins (file plugins + extras, on booked only)
225/// 7. Late validation (balance, currency, inventory, on booked only)
226/// 8. finalize (unused-pad warnings)
227/// 9. re-merge (booked + failed → final Ledger.directives)
228/// ```
229pub fn process(raw: LoadResult, options: &LoadOptions) -> Result<Ledger, ProcessError> {
230 let mut directives = raw.directives;
231 let mut errors: Vec<LedgerError> = Vec::new();
232
233 // Convert load errors to ledger errors (parse phase)
234 for load_err in raw.errors {
235 errors.push(LedgerError::error("LOAD", load_err.to_string()).with_phase("parse"));
236 }
237
238 // 1. Sort once into canonical display order: `(date, priority, file_id,
239 // span.start)`. This is what BQL / JSON / format output expect and
240 // what Python beancount produces via `(date, type_priority, lineno)`.
241 // `span.start` is a byte offset that orders within a file the same
242 // way line numbers would; `file_id` preserves include order across
243 // files (issue #1049 — same rows, different tie-break would diverge
244 // BQL output on same-date augmentation+reduction fixtures).
245 //
246 // Booking needs a different iteration order — augmentations BEFORE
247 // reductions on the same `(date, priority)` so lots exist when
248 // matched — but it doesn't need the underlying vec reordered.
249 // `run_booking` walks the vec via a transient `Vec<usize>` index
250 // that adds `has_cost_reduction` as an extra tiebreaker; this
251 // avoids a second full sort of `Vec<Spanned<Directive>>` (large
252 // structs) after booking just to put display order back.
253 directives.sort_by_key(|d| (d.value.date(), d.value.priority(), d.file_id, d.span.start));
254
255 // 2. Synth-only plugins — run BEFORE early validation so the
256 // synthesizers (`auto_accounts` and `document_discovery`) inject
257 // Opens / Documents that Early checks depend on (E1001 account
258 // presence, E5001 missing-document file). Only this narrow synth
259 // subset runs here; everything else waits until after booking
260 // (step 5) so cost-spec-reading plugins see filled-in
261 // `cost.number_per` values. See `PluginPass` rustdoc for the
262 // detailed split rationale.
263 #[cfg(feature = "plugins")]
264 if options.run_plugins || options.auto_accounts {
265 run_plugins(
266 &mut directives,
267 &raw.plugins,
268 &raw.options,
269 options,
270 &raw.source_map,
271 &mut errors,
272 PluginPass::PreBookingSynth,
273 )?;
274 }
275
276 // 3. Validation (early phase) — runs on pre-booking directives,
277 // AFTER plugins so account-presence checks (E1001) see any Opens
278 // that plugins like `auto_accounts` injected.
279 //
280 // This is what lets booking match Python's "prune zero-interp
281 // postings" behavior in step 4 without losing E1001 on the
282 // elided-zero-to-unopened-account case (rustledger#877).
283 //
284 // The `ValidationSession` carries state (open accounts,
285 // commodities, pending pads, accumulated tolerances) into the late
286 // phase at step 5 so balance assertions and inventory updates see
287 // everything the early phase recorded.
288 #[cfg(feature = "validation")]
289 let mut validation_session = if options.validate {
290 Some(rustledger_validate::ValidationSession::new(
291 build_validation_options(&raw.options, &raw.source_map),
292 ))
293 } else {
294 None
295 };
296
297 // Compute `today` once for both phases — avoids a midnight-crossing
298 // race where Early and Late could disagree on what day it is, and
299 // gives `FutureDate` warnings a single coherent reference point.
300 #[cfg(feature = "validation")]
301 let today = jiff::Zoned::now().date();
302
303 #[cfg(feature = "validation")]
304 if let Some(session) = validation_session.as_mut() {
305 let phase_errors =
306 session.run_phase_spanned(&directives, rustledger_validate::Phase::Early, today);
307 ledger_errors_extend(&mut errors, phase_errors, &raw.source_map);
308 }
309
310 // 4. Booking/interpolation
311 //
312 // The booking method comes from two sources: the API-level
313 // `LoadOptions.booking_method` and the file-level `option
314 // "booking_method"`. The file-level option takes precedence only
315 // when the file explicitly set it AND the caller hasn't overridden
316 // the API-level default. This matches Python beancount, where
317 // `option "booking_method" "FIFO"` sets the default for all accounts
318 // without an explicit method on their `open` directive.
319 //
320 // We check `set_options` (not `booking_method.is_empty()`) because
321 // `Options::new()` defaults `booking_method` to "STRICT", so the
322 // string is never empty.
323 //
324 // Booking drops zero-value interpolated postings as part of
325 // `interpolate()` — see the comment in
326 // `rustledger-booking/src/interpolate.rs`. The early validation
327 // pass above already caught E1001 on any unopened-account
328 // references, so it's safe to prune now (the now-removed
329 // `INTERPOLATED_MARKER` workaround in #1114 is obsolete).
330 // Run booking and receive the directives partitioned into
331 // `(booked, failed)`. Failed transactions are in pre-booking shape
332 // (unresolved cost specs, unfilled elided slots, possibly
333 // unbalanced); they don't flow into regular plugins or Late
334 // validation — booking already reported the root cause and the
335 // downstream checks would cascade misleading errors. They get
336 // re-merged for the final `Ledger.directives` so the user still
337 // sees their original input.
338 #[cfg(feature = "booking")]
339 let (mut booked, failed): (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) = {
340 let file_set_booking = raw.options.set_options.contains("booking_method");
341 let effective_method = if file_set_booking {
342 raw.options
343 .booking_method
344 .parse()
345 .unwrap_or(options.booking_method)
346 } else {
347 options.booking_method
348 };
349 run_booking(directives, effective_method, &mut errors)
350 };
351 #[cfg(not(feature = "booking"))]
352 let (mut booked, failed): (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) =
353 (directives, Vec::new());
354
355 // 5. Post-booking plugins — file-declared plugins + CLI extras.
356 // Runs AFTER booking so cost-spec-reading plugins
357 // (`implicit_prices`, `capital_gains_classifier`,
358 // `check_average_cost`, `sell_gains`, `unrealized`, `valuation`)
359 // see filled-in `cost.number_per` values. This matches Python
360 // beancount's plugins-after-booking ordering and closes
361 // rustledger#1117. Failed transactions were partitioned out
362 // above; plugins only see successfully-booked input.
363 #[cfg(feature = "plugins")]
364 if options.run_plugins || !options.extra_plugins.is_empty() {
365 run_plugins(
366 &mut booked,
367 &raw.plugins,
368 &raw.options,
369 options,
370 &raw.source_map,
371 &mut errors,
372 PluginPass::PostBooking,
373 )?;
374 }
375
376 // 6. Validation (late phase) — runs on booked + plugin-processed
377 // directives. Reuses the `ValidationSession` from step 2 so
378 // account/commodity/pad bookkeeping carries forward.
379 #[cfg(feature = "validation")]
380 if let Some(mut session) = validation_session {
381 let phase_errors =
382 session.run_phase_spanned(&booked, rustledger_validate::Phase::Late, today);
383 ledger_errors_extend(&mut errors, phase_errors, &raw.source_map);
384 let finalize_errors = session.finalize();
385 ledger_errors_extend(&mut errors, finalize_errors, &raw.source_map);
386 }
387
388 // 7. Re-merge failed transactions back into the directive list
389 // for output. The user wrote them and expects to see them in the
390 // resulting `Ledger.directives`; we just kept them isolated from
391 // post-booking processing. Re-sort to restore canonical display
392 // order (booked retained order during plugin transformation; the
393 // sort restores the failed entries' positions).
394 let mut directives = booked;
395 directives.extend(failed);
396 directives.sort_by_key(|d| (d.value.date(), d.value.priority(), d.file_id, d.span.start));
397
398 Ok(Ledger {
399 directives,
400 options: raw.options,
401 plugins: raw.plugins,
402 source_map: raw.source_map,
403 errors,
404 display_context: raw.display_context,
405 })
406}
407
408/// Run booking and interpolation on transactions, returning the
409/// directives partitioned into `(booked, failed)`.
410///
411/// The caller has already sorted `directives` into canonical display
412/// order `(date, priority, file_id, span.start)`. Booking needs the
413/// extra constraint that cost-reduction transactions process AFTER
414/// augmentations on the same `(date, priority)` so lots exist when
415/// matched. Rather than re-sorting the whole vec, we walk it via a
416/// transient `Vec<usize>` of indices sorted by booking order. Stable
417/// sort preserves display-order tiebreaks between transactions with
418/// the same `has_cost_reduction` flag.
419///
420/// Failed transactions are partitioned out into the second return
421/// value so they don't flow into regular plugins or Late validation
422/// (they're in pre-booking shape — postings have unresolved cost
423/// specs and unfilled elided slots, so downstream processing would
424/// cascade misleading errors). The caller is responsible for
425/// re-merging `failed` into the final `Ledger.directives` for output
426/// so the user still sees their original input.
427#[cfg(feature = "booking")]
428fn run_booking(
429 mut directives: Vec<Spanned<Directive>>,
430 booking_method: BookingMethod,
431 errors: &mut Vec<LedgerError>,
432) -> (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) {
433 use rustledger_booking::BookingEngine;
434
435 let mut engine = BookingEngine::with_method(booking_method);
436 engine.register_account_methods(directives.iter().map(|s| &s.value));
437
438 // Build an index ordered for booking: stable sort by
439 // `has_cost_reduction` only (display order — `(date, priority,
440 // file_id, span.start)` — is already encoded in the existing
441 // positional order, and stable_sort preserves that as the tiebreak).
442 let mut order: Vec<usize> = (0..directives.len()).collect();
443 order.sort_by_key(|&i| {
444 let d = &directives[i].value;
445 (d.date(), d.priority(), d.has_cost_reduction())
446 });
447
448 let mut failed_indices: Vec<usize> = Vec::new();
449 for &i in &order {
450 let spanned = &mut directives[i];
451 if let Directive::Transaction(txn) = &mut spanned.value {
452 match engine.book_and_interpolate(txn) {
453 Ok(result) => {
454 engine.apply(&result.transaction);
455 *txn = result.transaction;
456 }
457 Err(e) => {
458 errors.push(LedgerError::error(
459 "BOOK",
460 format!("{} ({}, \"{}\")", e, txn.date, txn.narration),
461 ));
462 failed_indices.push(i);
463 }
464 }
465 }
466 }
467
468 // Partition into (booked, failed). Indices are valid in the current
469 // `directives` vec (no mutation has happened since they were
470 // collected); after this consuming iteration the vec is gone and
471 // partition is fait accompli — no window where a caller could
472 // accidentally mutate between collection and partition.
473 let failed_set: rustc_hash::FxHashSet<usize> = failed_indices.iter().copied().collect();
474 let mut booked = Vec::with_capacity(directives.len() - failed_indices.len());
475 let mut failed = Vec::with_capacity(failed_indices.len());
476 for (i, d) in directives.into_iter().enumerate() {
477 if failed_set.contains(&i) {
478 failed.push(d);
479 } else {
480 booked.push(d);
481 }
482 }
483 (booked, failed)
484}
485
486/// Which subset of plugins to run.
487///
488/// The loader pipeline calls `run_plugins` twice: once with
489/// [`PluginPass::PreBookingSynth`] before the Early validation phase
490/// (so synthesizers can inject Opens / Documents that early checks
491/// depend on), and once with [`PluginPass::PostBooking`] after booking
492/// (so cost-spec-reading plugins like `implicit_prices`,
493/// `capital_gains_classifier`, `check_average_cost`, `sell_gains`,
494/// `unrealized`, and `valuation` see filled-in `cost.number_per`
495/// values).
496///
497/// Standalone callers (LSP, FFI, tests) that operate on already-booked
498/// input should pass [`PluginPass::All`] for the historical single-pass
499/// behavior.
500#[cfg(feature = "plugins")]
501#[derive(Debug, Clone, Copy, PartialEq, Eq)]
502pub enum PluginPass {
503 /// Only plugins that synthesize directives the Early validator
504 /// depends on: `auto_accounts` (synthesizes Open directives) and
505 /// the built-in document discovery walker (synthesizes Document
506 /// directives the early phase checks for missing files).
507 PreBookingSynth,
508 /// All file-declared plugins and CLI `extra_plugins`, EXCLUDING
509 /// `auto_accounts` and `document_discovery` (those ran pre-booking).
510 /// Includes the 28 plugins that don't depend on synth state but
511 /// may depend on booked cost specs.
512 PostBooking,
513 /// Every plugin — historical single-pass behavior. Used by callers
514 /// (LSP, FFI, standalone tests) that don't run booking themselves
515 /// or that work on already-booked input.
516 All,
517}
518
519/// Run plugins on directives.
520///
521/// Executes native plugins (and document discovery) on the given directives,
522/// modifying them in-place. Plugin errors are appended to `errors`.
523///
524/// `pass` selects which subset of plugins to run — see [`PluginPass`].
525/// The loader pipeline calls this twice (synth pass before Early,
526/// regular pass after booking). LSP / FFI / standalone callers pass
527/// `PluginPass::All` for the historical behavior.
528#[cfg(feature = "plugins")]
529pub fn run_plugins(
530 directives: &mut Vec<Spanned<Directive>>,
531 file_plugins: &[Plugin],
532 file_options: &Options,
533 options: &LoadOptions,
534 source_map: &SourceMap,
535 errors: &mut Vec<LedgerError>,
536 pass: PluginPass,
537) -> Result<(), ProcessError> {
538 use rustledger_plugin::{
539 DocumentDiscoveryPlugin, NativePlugin, NativePluginRegistry, PluginInput, PluginOptions,
540 };
541
542 // Resolve document directories relative to the main file's directory
543 // Document discovery only runs when run_plugins is true (respects raw mode)
544 let base_dir = source_map
545 .files()
546 .first()
547 .and_then(|f| f.path.parent())
548 .unwrap_or_else(|| std::path::Path::new("."));
549
550 // `document_discovery` is a synthesizer — runs in PreBookingSynth
551 // and All, skipped in PostBooking (it already injected directives
552 // during the synth pass).
553 let run_doc_discovery = matches!(pass, PluginPass::PreBookingSynth | PluginPass::All)
554 && options.run_plugins
555 && !file_options.documents.is_empty();
556 let has_document_dirs = run_doc_discovery;
557 let resolved_documents: Vec<String> = if has_document_dirs {
558 file_options
559 .documents
560 .iter()
561 .map(|d| {
562 let path = std::path::Path::new(d);
563 if path.is_absolute() {
564 d.clone()
565 } else {
566 base_dir.join(path).to_string_lossy().to_string()
567 }
568 })
569 .collect()
570 } else {
571 Vec::new()
572 };
573
574 // Build the native plugin registry up front so we can ask each
575 // plugin whether it's a synthesizer (via `NativePlugin::is_synth`)
576 // during the classification step below. Constructing the registry
577 // is O(n_plugins) and just instantiates the plugin structs; it's
578 // cheap to do before we know whether any plugins will actually
579 // run.
580 let registry = NativePluginRegistry::new();
581
582 // Collect raw plugin names first (we'll resolve them with the registry later)
583 // Tuple: (name, config, force_python)
584 let mut raw_plugins: Vec<(String, Option<String>, bool)> = Vec::new();
585
586 // Classify a plugin by name. Self-classification lives on the
587 // `NativePlugin::is_synth` trait method (see
588 // `rustledger-plugin/src/native/mod.rs`). Plugins not in the
589 // native registry (WASM, Python) default to non-synth — they
590 // run post-booking like file-authored beancount plugins.
591 let is_synth = |name: &str| -> bool { registry.find(name).is_some_and(NativePlugin::is_synth) };
592
593 // The API-level `options.auto_accounts` flag is a synth source.
594 if options.auto_accounts && matches!(pass, PluginPass::PreBookingSynth | PluginPass::All) {
595 raw_plugins.push(("auto_accounts".to_string(), None, false));
596 }
597
598 // File-declared plugins: synth plugins go in PreBookingSynth,
599 // everything else (including the 6 cost-spec-reading ones) goes in
600 // PostBooking. `PluginPass::All` runs everything for standalone
601 // callers (LSP / FFI / tests on already-booked input).
602 if options.run_plugins {
603 for plugin in file_plugins {
604 let synth = is_synth(&plugin.name);
605 let in_pass = match pass {
606 PluginPass::PreBookingSynth => synth,
607 PluginPass::PostBooking => !synth,
608 PluginPass::All => true,
609 };
610 if in_pass {
611 raw_plugins.push((
612 plugin.name.clone(),
613 plugin.config.clone(),
614 plugin.force_python,
615 ));
616 }
617 }
618 }
619
620 // CLI extras: same synth/regular split as file plugins.
621 for (i, plugin_name) in options.extra_plugins.iter().enumerate() {
622 let synth = is_synth(plugin_name);
623 let in_pass = match pass {
624 PluginPass::PreBookingSynth => synth,
625 PluginPass::PostBooking => !synth,
626 PluginPass::All => true,
627 };
628 if in_pass {
629 let config = options.extra_plugin_configs.get(i).cloned().flatten();
630 raw_plugins.push((plugin_name.clone(), config, false));
631 }
632 }
633
634 // Check if we have any work to do - early return before creating registry
635 if raw_plugins.is_empty() && !has_document_dirs {
636 return Ok(());
637 }
638
639 let plugin_options = PluginOptions {
640 operating_currencies: file_options.operating_currency.clone(),
641 title: file_options.title.clone(),
642 };
643
644 // Run document discovery plugin if documents directories are configured.
645 // Each plugin call builds wrappers freshly from the current `directives`,
646 // sends them to the plugin, receives `PluginOp`s, and applies the ops
647 // to update `directives` — spans on `Keep` / `Modify` ops are inherited
648 // from the original `directives` entry by index, so plugin-transformed
649 // directives retain byte-precise source locations.
650 if has_document_dirs {
651 let doc_plugin = DocumentDiscoveryPlugin::new(resolved_documents, base_dir.to_path_buf());
652 let wrappers = build_wrappers(directives, source_map);
653 let input = PluginInput {
654 directives: wrappers,
655 options: plugin_options.clone(),
656 config: None,
657 };
658 let output = doc_plugin.process(input);
659 record_plugin_errors(errors, output.errors, source_map);
660 apply_plugin_ops(directives, output.ops, errors, source_map)?;
661 }
662
663 // Run each plugin (registry was constructed earlier for the
664 // synth classification step).
665 if !raw_plugins.is_empty() {
666 for (raw_name, plugin_config, force_python) in &raw_plugins {
667 // Resolve the plugin name - try direct match first, then prefixed variants.
668 // Skip native resolution when force_python is set (plugin "python:..." prefix).
669 let resolved_name = if *force_python {
670 None
671 } else if registry.find(raw_name).is_some() {
672 Some(raw_name.as_str())
673 } else if let Some(short_name) = raw_name.strip_prefix("beancount.plugins.") {
674 registry.find(short_name).is_some().then_some(short_name)
675 } else if let Some(short_name) = raw_name.strip_prefix("beancount_reds_plugins.") {
676 registry.find(short_name).is_some().then_some(short_name)
677 } else if let Some(short_name) = raw_name.strip_prefix("beancount_lazy_plugins.") {
678 registry.find(short_name).is_some().then_some(short_name)
679 } else {
680 None
681 };
682
683 if let Some(name) = resolved_name
684 && let Some(plugin) = registry.find(name)
685 {
686 let wrappers = build_wrappers(directives, source_map);
687 let input = PluginInput {
688 directives: wrappers,
689 options: plugin_options.clone(),
690 config: plugin_config.clone(),
691 };
692 let output = plugin.process(input);
693 record_plugin_errors(errors, output.errors, source_map);
694 apply_plugin_ops(directives, output.ops, errors, source_map)?;
695 } else {
696 // Not a native plugin — categorize and handle
697 let plugin_path = std::path::Path::new(raw_name);
698 let ext = plugin_path
699 .extension()
700 .and_then(|e| e.to_str())
701 .unwrap_or("")
702 .to_lowercase();
703
704 // The closure is only invoked from inside the wasm-plugins /
705 // python-plugins cfg blocks below. The whole function is
706 // already `#[cfg(feature = "plugins")]`, so this only matters
707 // when `plugins` is enabled but neither child feature is
708 // (e.g. `--features native-plugins`). Allow `unused_variables`
709 // for exactly that configuration. Underscore-prefixing the
710 // binding would have been the wrong fix because we DO call
711 // the closure in builds with one of the features enabled,
712 // which would trip `no_effect_underscore_binding` instead.
713 #[cfg_attr(
714 not(any(feature = "wasm-plugins", feature = "python-plugins")),
715 allow(unused_variables)
716 )]
717 let resolve_path = |name: &str| -> Result<std::path::PathBuf, String> {
718 let p = std::path::Path::new(name);
719 let resolved = if p.is_absolute() {
720 p.to_path_buf()
721 } else {
722 base_dir.join(name)
723 };
724
725 // Path security: prevent plugins from outside the ledger directory
726 if options.path_security
727 && let (Ok(canon_base), Ok(canon_plugin)) =
728 (base_dir.canonicalize(), resolved.canonicalize())
729 && !canon_plugin.starts_with(&canon_base)
730 {
731 return Err(format!(
732 "plugin path '{name}' is outside the ledger directory"
733 ));
734 }
735
736 Ok(resolved)
737 };
738
739 if ext == "wasm" {
740 // WASM plugin
741 #[cfg(feature = "wasm-plugins")]
742 {
743 let wasm_path = match resolve_path(raw_name) {
744 Ok(p) => p,
745 Err(e) => {
746 errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
747 continue;
748 }
749 };
750 let wrappers = build_wrappers(directives, source_map);
751 match run_wasm_plugin(&wasm_path, &wrappers, &plugin_options, plugin_config)
752 {
753 Ok((ops, plugin_errors)) => {
754 for err in plugin_errors {
755 errors.push(err);
756 }
757 apply_plugin_ops(directives, ops, errors, source_map)?;
758 }
759 Err(e) => {
760 errors.push(
761 LedgerError::error(
762 "PLUGIN",
763 format!("WASM plugin {} failed: {e}", wasm_path.display()),
764 )
765 .with_phase("plugin"),
766 );
767 }
768 }
769 }
770 #[cfg(not(feature = "wasm-plugins"))]
771 {
772 errors.push(
773 LedgerError::error(
774 "PLUGIN",
775 format!(
776 "WASM plugin '{raw_name}' requires the wasm-plugins feature",
777 ),
778 )
779 .with_phase("plugin"),
780 );
781 }
782 } else if *force_python
783 || ext == "py"
784 || raw_name.contains(std::path::MAIN_SEPARATOR)
785 || raw_name.contains('.')
786 {
787 // Python module or file-based plugin (or force_python via "python:" prefix)
788 #[cfg(feature = "python-plugins")]
789 {
790 let resolved = match resolve_path(raw_name) {
791 Ok(p) => p,
792 Err(e) => {
793 errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
794 continue;
795 }
796 };
797 let wrappers = build_wrappers(directives, source_map);
798 match run_python_plugin(
799 raw_name,
800 &resolved,
801 base_dir,
802 &wrappers,
803 &plugin_options,
804 plugin_config,
805 ) {
806 Ok((ops, plugin_errors)) => {
807 for err in plugin_errors {
808 errors.push(err);
809 }
810 apply_plugin_ops(directives, ops, errors, source_map)?;
811 }
812 Err(e) => {
813 errors.push(LedgerError::error("E8002", e).with_phase("plugin"));
814 }
815 }
816 }
817 #[cfg(not(feature = "python-plugins"))]
818 {
819 errors.push(
820 LedgerError::error(
821 "E8005",
822 format!(
823 "Python plugin \"{raw_name}\" requires the python-plugins feature",
824 ),
825 )
826 .with_phase("plugin"),
827 );
828 }
829 } else {
830 // Completely unknown plugin name — try to suggest a module path
831 #[cfg(feature = "python-plugins")]
832 {
833 use rustledger_plugin::python::{is_python_available, suggest_module_path};
834 let suggestion = if is_python_available() {
835 suggest_module_path(raw_name)
836 } else {
837 None
838 };
839 if let Some(module_path) = suggestion {
840 errors.push(
841 LedgerError::error(
842 "E8004",
843 format!(
844 "Cannot resolve Python module '{raw_name}'. Replace with: plugin \"{module_path}\""
845 ),
846 )
847 .with_phase("plugin"),
848 );
849 } else {
850 errors.push(
851 LedgerError::error(
852 "E8001",
853 format!("Plugin not found: \"{raw_name}\""),
854 )
855 .with_phase("plugin"),
856 );
857 }
858 }
859 #[cfg(not(feature = "python-plugins"))]
860 {
861 errors.push(
862 LedgerError::error(
863 "E8001",
864 format!("Plugin not found: \"{raw_name}\""),
865 )
866 .with_phase("plugin"),
867 );
868 }
869 }
870 }
871 }
872 }
873
874 // No final wrapper→directive conversion needed: `apply_plugin_ops`
875 // updates `directives` in place after each plugin call, preserving
876 // original spans on Keep/Modify ops. Plugin-synthesized directives
877 // (Insert ops) get `SYNTHESIZED_FILE_ID` and a zero span.
878 Ok(())
879}
880
881/// Build a fresh `Vec<DirectiveWrapper>` from the current directives,
882/// carrying filename + line number for plugin-side error reporting.
883/// Spans don't need to round-trip through the wrappers — the loader
884/// preserves them via `apply_plugin_ops` matching on op index.
885#[cfg(feature = "plugins")]
886fn build_wrappers(
887 directives: &[Spanned<Directive>],
888 source_map: &SourceMap,
889) -> Vec<rustledger_plugin::DirectiveWrapper> {
890 use rustledger_plugin::directive_to_wrapper_with_location;
891
892 directives
893 .iter()
894 .map(|spanned| {
895 let (filename, lineno) = if let Some(file) = source_map.get(spanned.file_id as usize) {
896 let (line, _col) = file.line_col(spanned.span.start);
897 (Some(file.path.display().to_string()), Some(line as u32))
898 } else {
899 (None, None)
900 };
901 directive_to_wrapper_with_location(&spanned.value, filename, lineno)
902 })
903 .collect()
904}
905
906/// Push plugin errors into the ledger's error stream, tagged with
907/// `phase: "plugin"` and — when the plugin set `source_file` /
908/// `line_number` on the error — an attached `ErrorLocation` so
909/// downstream renderers (CLI, LSP, JSON output) can pinpoint where
910/// the plugin objected.
911///
912/// Source-location resolution: if the wrapper's `source_file` resolves
913/// to a real file in the source map, use that for `ErrorLocation.file`
914/// and treat `line_number` as the line index. Plugin-synthesized
915/// filenames (e.g. `"<auto_accounts>"`) that don't match any real
916/// file are passed through as `PathBuf::from(name)` so the rendered
917/// location still attributes the error to the originating plugin —
918/// better than silently dropping the field.
919#[cfg(feature = "plugins")]
920fn record_plugin_errors(
921 errors: &mut Vec<LedgerError>,
922 plugin_errors: Vec<rustledger_plugin::PluginError>,
923 source_map: &SourceMap,
924) {
925 for err in plugin_errors {
926 let mut ledger_err = match err.severity {
927 rustledger_plugin::PluginErrorSeverity::Error => {
928 LedgerError::error("PLUGIN", err.message).with_phase("plugin")
929 }
930 rustledger_plugin::PluginErrorSeverity::Warning => {
931 LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
932 }
933 };
934 // Propagate plugin-set source location into `ErrorLocation`.
935 // Column defaults to 1 — plugin errors don't carry column info
936 // through the wrapper protocol.
937 if let (Some(file), Some(line)) = (&err.source_file, err.line_number) {
938 let resolved_path = source_map
939 .get_by_path(std::path::Path::new(file))
940 .map_or_else(|| std::path::PathBuf::from(file), |f| f.path.clone());
941 ledger_err = ledger_err.with_location(ErrorLocation {
942 file: resolved_path,
943 line: line as usize,
944 column: 1,
945 });
946 }
947 errors.push(ledger_err);
948 }
949}
950
951/// Apply a plugin's `Vec<PluginOp>` to `directives` in place.
952///
953/// Validates that the op set forms a complete partition of the input
954/// indices (each input index appears in exactly one `Keep` / `Modify` /
955/// `Delete` op). Protocol violations produce a `PLUGIN` error in
956/// `errors` and leave `directives` untouched.
957///
958/// For `Keep(i)` / `Modify(i, w)`, the resulting `Spanned<Directive>`
959/// inherits `directives[i]`'s span and `file_id` — this is the core of
960/// the ops protocol's correctness guarantee (plugin-transformed
961/// directives keep their original source identity for error reporting).
962/// `Insert(w)` directives get `(Span::new(0, 0), SYNTHESIZED_FILE_ID)`.
963#[cfg(feature = "plugins")]
964fn apply_plugin_ops(
965 directives: &mut Vec<Spanned<Directive>>,
966 ops: Vec<rustledger_plugin::PluginOp>,
967 errors: &mut Vec<LedgerError>,
968 source_map: &SourceMap,
969) -> Result<(), ProcessError> {
970 use rustledger_plugin::PluginOp;
971 use rustledger_plugin::wrapper_to_directive;
972
973 let n = directives.len();
974
975 // Validate: every input index in {Keep, Modify, Delete} exactly once.
976 let mut seen = vec![false; n];
977 for op in &ops {
978 let idx = match op {
979 PluginOp::Keep(i) | PluginOp::Modify(i, _) | PluginOp::Delete(i) => Some(*i),
980 PluginOp::Insert(_) => None,
981 };
982 if let Some(i) = idx {
983 if i >= n {
984 errors.push(
985 LedgerError::error(
986 "PLUGIN",
987 format!(
988 "plugin op references out-of-bounds input index {i} (input has {n} directives)"
989 ),
990 )
991 .with_phase("plugin"),
992 );
993 return Ok(());
994 }
995 if seen[i] {
996 errors.push(
997 LedgerError::error(
998 "PLUGIN",
999 format!("plugin op references input index {i} more than once"),
1000 )
1001 .with_phase("plugin"),
1002 );
1003 return Ok(());
1004 }
1005 seen[i] = true;
1006 }
1007 }
1008 for (i, was_seen) in seen.iter().enumerate() {
1009 if !was_seen {
1010 errors.push(
1011 LedgerError::error(
1012 "PLUGIN",
1013 format!(
1014 "plugin omitted input directive {i} (must appear in exactly one of Keep/Modify/Delete)"
1015 ),
1016 )
1017 .with_phase("plugin"),
1018 );
1019 return Ok(());
1020 }
1021 }
1022
1023 // Materialize new directives, preserving spans for Keep/Modify.
1024 let mut new_directives = Vec::with_capacity(ops.len());
1025 for op in ops {
1026 match op {
1027 PluginOp::Keep(i) => {
1028 new_directives.push(directives[i].clone());
1029 }
1030 PluginOp::Modify(i, wrapper) => {
1031 let directive = wrapper_to_directive(&wrapper)
1032 .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1033 new_directives.push(Spanned {
1034 value: directive,
1035 span: directives[i].span,
1036 file_id: directives[i].file_id,
1037 });
1038 }
1039 PluginOp::Insert(wrapper) => {
1040 // Resolve the wrapper's filename + line number, if set,
1041 // into a real (file_id, span) when the filename
1042 // corresponds to a loaded source file. Falls back to
1043 // SYNTHESIZED_FILE_ID + zero span otherwise — including
1044 // for plugin-only attribution like `"<auto_accounts>"`
1045 // (which never matches a loaded file).
1046 let (span, file_id) = match (&wrapper.filename, wrapper.lineno) {
1047 (Some(filename), Some(lineno)) => {
1048 if let Some(file) = source_map.get_by_path(std::path::Path::new(filename)) {
1049 let span_start = file.line_start(lineno as usize).unwrap_or(0);
1050 (
1051 rustledger_parser::Span::new(span_start, span_start),
1052 file.id as u16,
1053 )
1054 } else {
1055 (
1056 rustledger_parser::Span::new(0, 0),
1057 rustledger_parser::SYNTHESIZED_FILE_ID,
1058 )
1059 }
1060 }
1061 _ => (
1062 rustledger_parser::Span::new(0, 0),
1063 rustledger_parser::SYNTHESIZED_FILE_ID,
1064 ),
1065 };
1066 let directive = wrapper_to_directive(&wrapper)
1067 .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1068 new_directives.push(Spanned::new(directive, span).with_file_id(file_id as usize));
1069 }
1070 PluginOp::Delete(_) => {}
1071 }
1072 }
1073
1074 *directives = new_directives;
1075 Ok(())
1076}
1077
1078/// Build a [`ValidationOptions`] from loader-level file options.
1079///
1080/// Factored out of the old `run_validation` so both the early and
1081/// late phases in `process()` can share the same `ValidationSession`
1082/// configuration. Document-dir resolution is relative to the main
1083/// file's parent directory.
1084#[cfg(feature = "validation")]
1085fn build_validation_options(
1086 file_options: &Options,
1087 source_map: &SourceMap,
1088) -> rustledger_validate::ValidationOptions {
1089 use rustledger_validate::ValidationOptions;
1090
1091 // Resolve document directories relative to the main file's
1092 // directory. Absolute paths pass through; relative paths are
1093 // joined onto the source map's first file's parent. Matches the
1094 // pre-refactor `run_validation` behavior exactly.
1095 let base_dir = source_map
1096 .files()
1097 .first()
1098 .and_then(|f| f.path.parent())
1099 .unwrap_or_else(|| std::path::Path::new("."));
1100
1101 let resolved_document_dirs: Vec<std::path::PathBuf> = file_options
1102 .documents
1103 .iter()
1104 .map(|d| {
1105 let path = std::path::Path::new(d);
1106 if path.is_absolute() {
1107 path.to_path_buf()
1108 } else {
1109 base_dir.join(path)
1110 }
1111 })
1112 .collect();
1113
1114 let account_types: Vec<String> = file_options
1115 .account_types()
1116 .iter()
1117 .map(|s| (*s).to_string())
1118 .collect();
1119
1120 ValidationOptions::default()
1121 .with_account_types(account_types)
1122 .with_document_dirs(resolved_document_dirs)
1123 .with_infer_tolerance_from_cost(file_options.infer_tolerance_from_cost)
1124 .with_tolerance_multiplier(file_options.inferred_tolerance_multiplier)
1125 .with_inferred_tolerance_default(file_options.inferred_tolerance_default.clone())
1126}
1127
1128/// Convert a batch of [`rustledger_validate::ValidationError`]s into
1129/// loader-level [`LedgerError`]s (with resolved `file:line:column`
1130/// locations) and append to the existing list.
1131///
1132/// Factored out so both validation phases in `process()` share the
1133/// same conversion path.
1134#[cfg(feature = "validation")]
1135fn ledger_errors_extend(
1136 errors: &mut Vec<LedgerError>,
1137 validation_errors: Vec<rustledger_validate::ValidationError>,
1138 source_map: &SourceMap,
1139) {
1140 for err in validation_errors {
1141 let phase = if err.code.is_parse_phase() {
1142 "parse"
1143 } else {
1144 "validate"
1145 };
1146 let severity_level = if err.code.is_warning() {
1147 ErrorSeverity::Warning
1148 } else {
1149 ErrorSeverity::Error
1150 };
1151 // Fold the advisory note (if any) into the message so it propagates
1152 // through every downstream format (LedgerError, JSON diagnostic, CLI
1153 // report, LSP diagnostic) without each one needing a dedicated field.
1154 let message = match &err.note {
1155 Some(note) => format!("{err}\n note: {note}"),
1156 None => err.to_string(),
1157 };
1158 // Resolve span + file_id into a file/line/column triple so CLI and
1159 // LSP consumers can render `file:line:col` headers without having
1160 // to do the lookup themselves (issue #901).
1161 let location = err.span.and_then(|span| {
1162 let fid = err.file_id? as usize;
1163 let file = source_map.get(fid)?;
1164 let (line, column) = file.line_col(span.start);
1165 Some(ErrorLocation {
1166 file: file.path.clone(),
1167 line,
1168 column,
1169 })
1170 });
1171 errors.push(LedgerError {
1172 severity: severity_level,
1173 code: err.code.code().to_string(),
1174 message,
1175 location,
1176 source_span: err.span.map(|s| (s.start, s.end)),
1177 file_id: err.file_id,
1178 phase: phase.to_string(),
1179 });
1180 }
1181}
1182
1183/// Load and fully process a beancount file.
1184///
1185/// This is the main entry point, equivalent to Python's `loader.load_file()`.
1186/// It performs: parse → sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
1187///
1188/// # Example
1189///
1190/// ```ignore
1191/// use rustledger_loader::{load, LoadOptions};
1192/// use std::path::Path;
1193///
1194/// let ledger = load(Path::new("ledger.beancount"), LoadOptions::default())?;
1195/// for error in &ledger.errors {
1196/// eprintln!("{}: {}", error.code, error.message);
1197/// }
1198/// ```
1199pub fn load(path: &Path, options: &LoadOptions) -> Result<Ledger, ProcessError> {
1200 let mut loader = crate::Loader::new();
1201
1202 if options.path_security {
1203 loader = loader.with_path_security(true);
1204 }
1205
1206 let raw = loader.load(path)?;
1207 process(raw, options)
1208}
1209
1210/// Load a beancount file without processing.
1211///
1212/// This returns raw directives without sorting, booking, or plugins.
1213/// Use this when you need the original parse output.
1214pub fn load_raw(path: &Path) -> Result<LoadResult, LoadError> {
1215 crate::Loader::new().load(path)
1216}
1217
1218/// Run a WASM plugin and return its output ops and errors.
1219#[cfg(feature = "wasm-plugins")]
1220fn run_wasm_plugin(
1221 wasm_path: &std::path::Path,
1222 directives: &[rustledger_plugin::DirectiveWrapper],
1223 options: &rustledger_plugin::PluginOptions,
1224 config: &Option<String>,
1225) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1226 use rustledger_plugin::{PluginInput, PluginManager};
1227
1228 let mut mgr = PluginManager::new();
1229 let plugin_idx = mgr
1230 .load(wasm_path)
1231 .map_err(|e| format!("failed to load: {e}"))?;
1232
1233 let input = PluginInput {
1234 directives: directives.to_vec(),
1235 options: options.clone(),
1236 config: config.clone(),
1237 };
1238
1239 let output = mgr
1240 .execute(plugin_idx, &input)
1241 .map_err(|e| format!("execution failed: {e}"))?;
1242
1243 let mut errors = Vec::new();
1244 for err in output.errors {
1245 let ledger_err = match err.severity {
1246 rustledger_plugin::PluginErrorSeverity::Error => {
1247 LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1248 }
1249 rustledger_plugin::PluginErrorSeverity::Warning => {
1250 LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1251 }
1252 };
1253 errors.push(ledger_err);
1254 }
1255
1256 Ok((output.ops, errors))
1257}
1258
1259/// Run a Python module plugin via the WASI-based Python runtime.
1260#[cfg(feature = "python-plugins")]
1261fn run_python_plugin(
1262 module_name: &str,
1263 resolved_path: &std::path::Path,
1264 base_dir: &std::path::Path,
1265 directives: &[rustledger_plugin::DirectiveWrapper],
1266 options: &rustledger_plugin::PluginOptions,
1267 config: &Option<String>,
1268) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1269 use rustledger_plugin::{PluginInput, python::PythonRuntime};
1270
1271 let runtime = PythonRuntime::new().map_err(|e| format!("Python runtime unavailable: {e}"))?;
1272
1273 let input = PluginInput {
1274 directives: directives.to_vec(),
1275 options: options.clone(),
1276 config: config.clone(),
1277 };
1278
1279 // Try file-based execution first, then module-based
1280 let is_file = resolved_path.exists()
1281 || std::path::Path::new(module_name)
1282 .extension()
1283 .is_some_and(|ext| ext.eq_ignore_ascii_case("py"))
1284 || module_name.contains(std::path::MAIN_SEPARATOR);
1285
1286 let output = if is_file {
1287 runtime
1288 .execute_module(module_name, &input, Some(base_dir))
1289 .map_err(|e| format!("Python plugin execution failed: {e}"))?
1290 } else {
1291 runtime
1292 .execute_module(module_name, &input, Some(base_dir))
1293 .map_err(|e| format!("Python plugin '{module_name}' execution failed: {e}"))?
1294 };
1295
1296 let mut errors = Vec::new();
1297 for err in output.errors {
1298 let ledger_err = match err.severity {
1299 rustledger_plugin::PluginErrorSeverity::Error => {
1300 LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1301 }
1302 rustledger_plugin::PluginErrorSeverity::Warning => {
1303 LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1304 }
1305 };
1306 errors.push(ledger_err);
1307 }
1308
1309 Ok((output.ops, errors))
1310}