Skip to main content

rustledger_importer/
wasm.rs

1//! Host loader for WASM-implemented importers (wave 2.3b).
2//!
3//! A [`WasmImporter`] wraps a `.wasm` module and implements the
4//! [`crate::Importer`] trait by serializing inputs to `MessagePack`,
5//! calling into the module via wasmtime, and deserializing outputs.
6//!
7//! # Sandbox model
8//!
9//! Mirrors the existing directive-plugin runtime in
10//! `rustledger-plugin/src/runtime.rs`:
11//!
12//! - No imports allowed (rejected at load time)
13//! - No WASI / filesystem / network / env / syscalls
14//! - Memory limit enforced (default 256 MiB)
15//! - Fuel-based execution time limit (default 30 s)
16//!
17//! The host reads the source file into memory and passes the bytes
18//! via [`ImporterInput::content`]; the WASM importer never opens the
19//! file itself.
20//!
21//! # Required WASM exports
22//!
23//! A WASM importer module must export:
24//!
25//! - `memory` — the standard linear memory
26//! - `alloc(size: u32) -> u32` — allocates `size` bytes, returns pointer
27//! - `metadata() -> u64` — packed `(ptr << 32) | len` of `MessagePack`
28//!   [`MetadataOutput`]. Called once at load.
29//! - `identify(ptr: u32, len: u32) -> u64` — input is msgpack
30//!   [`IdentifyInput`], output is msgpack [`IdentifyOutput`].
31//! - `extract(ptr: u32, len: u32) -> u64` — input is msgpack
32//!   [`ImporterInput`], output is msgpack [`ImporterOutput`].
33//! - `extract_enriched(ptr: u32, len: u32) -> u64` — input is msgpack
34//!   [`ImporterInput`], output is msgpack [`EnrichedImporterOutput`].
35
36use std::path::{Path, PathBuf};
37use std::sync::Arc;
38
39use rustledger_ops::fingerprint::Fingerprint;
40use rustledger_plugin::sandbox::{self, StoreState};
41use rustledger_plugin_types::{
42    EnrichedImporterOutput, IdentifyInput, IdentifyOutput, ImporterInput, ImporterOutput,
43    MetadataOutput, PluginError, PluginErrorSeverity,
44};
45use serde::{Serialize, de::DeserializeOwned};
46use wasmtime::{Engine, Linker, Module, Store};
47
48use crate::config::{CsvConfig, ImporterType};
49use crate::{EnrichedImportResult, ImportResult, Importer, ImporterConfig};
50
51// NOTE on hardcoded caps below: `MAX_OUTPUT_BYTES` and `MAX_INPUT_BYTES`
52// are per-process constants, not per-importer config. They're sized
53// generously (64 MiB each) for any realistic bank-statement import.
54// Per-importer tunability is a v1.0 surface decision; for v0.16-pre the
55// caps are intentionally fixed so the security contract is uniform
56// across all loaded importers regardless of who configured them.
57
58/// Hard cap on the byte length a WASM importer can return from any
59/// entry point. Prevents a malicious or buggy module from triggering a
60/// 4 GiB host allocation by returning `(any_ptr, u32::MAX)`. 64 MiB is
61/// well above any realistic importer output for a single statement.
62const MAX_OUTPUT_BYTES: usize = 64 * 1024 * 1024;
63
64/// Hard cap on the byte length of input the host will marshal into the
65/// WASM module. Mirrors `MAX_OUTPUT_BYTES` on the input side:
66/// `wasm32` memory is `u32`-addressed, so anything over 4 GiB is
67/// fundamentally not addressable, but we cap much lower to avoid
68/// runaway allocations from accidentally-huge source files.
69const MAX_INPUT_BYTES: usize = 64 * 1024 * 1024;
70
71/// Configuration for the WASM importer runtime.
72#[derive(Debug, Clone, Copy)]
73pub struct WasmRuntimeConfig {
74    /// Maximum memory in bytes (default 256 MiB).
75    pub max_memory: usize,
76    /// Maximum execution time in seconds (default 30). Converted to a
77    /// fuel budget at roughly 1M instructions per second.
78    pub max_time_secs: u64,
79}
80
81impl Default for WasmRuntimeConfig {
82    fn default() -> Self {
83        Self {
84            // Both fields are aliases of the workspace-wide sandbox
85            // defaults so the importer, the regular plugin path, and
86            // the Python plugin runtime (memory only -- Python opts
87            // out of the time default) can't drift apart silently.
88            max_memory: sandbox::DEFAULT_SANDBOX_MAX_MEMORY,
89            max_time_secs: sandbox::DEFAULT_SANDBOX_MAX_TIME_SECS,
90        }
91    }
92}
93
94/// Errors that can occur loading or invoking a WASM importer.
95#[derive(Debug, thiserror::Error)]
96pub enum WasmImporterError {
97    /// Failed to read the `.wasm` file from disk.
98    #[error("failed to read WASM file {path}: {source}")]
99    Io {
100        /// Path the host tried to read.
101        path: PathBuf,
102        /// Underlying I/O error.
103        source: std::io::Error,
104    },
105    /// Failed to enumerate an entry while scanning a directory for
106    /// `.wasm` files. Distinct from [`Self::Io`] because the entry's
107    /// name is unknown when read fails — only the dir is named.
108    /// Typically permission-denied on a single inode or a broken
109    /// symlink.
110    #[error("failed to enumerate entry in WASM importer directory {dir}: {source}")]
111    DirEntry {
112        /// Directory being scanned.
113        dir: PathBuf,
114        /// Underlying I/O error from `read_dir().next()`.
115        source: std::io::Error,
116    },
117    /// The WASM module is malformed or uses unsupported features.
118    #[error("failed to compile WASM module {path}: {source}")]
119    Compile {
120        /// Path of the module that failed to compile.
121        path: PathBuf,
122        /// Underlying wasmtime compile error.
123        source: anyhow::Error,
124    },
125    /// The WASM module has imports — they're forbidden in the importer
126    /// sandbox. Importers must be self-contained.
127    #[error(
128        "WASM importer has forbidden import {module}::{name} — importers must be self-contained"
129    )]
130    ForbiddenImport {
131        /// Import module namespace (e.g. `env`, `wasi_snapshot_preview1`).
132        module: String,
133        /// Import item name within the module.
134        name: String,
135    },
136    /// A required export is missing.
137    #[error("WASM importer missing required export `{0}`")]
138    MissingExport(&'static str),
139    /// Runtime error during a wasmtime call (trap, fuel exhausted,
140    /// memory limit, etc.).
141    #[error("WASM importer runtime error: {0}")]
142    Runtime(#[source] anyhow::Error),
143    /// `MessagePack` decode error on the WASM-returned bytes.
144    #[error("WASM importer returned malformed MessagePack: {0}")]
145    Decode(#[source] rmp_serde::decode::Error),
146    /// `MessagePack` encode error on the input being sent to the WASM
147    /// importer. Practically only happens if `ImporterConfig` carries
148    /// non-serializable state, which shouldn't.
149    #[error("failed to encode input for WASM importer: {0}")]
150    Encode(#[source] rmp_serde::encode::Error),
151    /// The WASM importer returned an `out_len` larger than the host's
152    /// allocation cap (`MAX_OUTPUT_BYTES`, currently 64 MiB). Either
153    /// the module is buggy/malicious or the cap needs raising for a
154    /// genuinely huge import.
155    #[error("WASM importer returned output of {len} bytes, exceeds cap of {max} bytes")]
156    OutputTooLarge {
157        /// Length the module reported.
158        len: usize,
159        /// Host's enforced cap (`MAX_OUTPUT_BYTES`).
160        max: usize,
161    },
162    /// The input the host tried to marshal exceeds the host's input
163    /// cap (`MAX_INPUT_BYTES`, currently 64 MiB). The host caps
164    /// before a lossy `as u32` cast (wasm32 memory is `u32`-addressed,
165    /// so >4 GiB input would silently truncate).
166    #[error("input of {len} bytes exceeds cap of {max} bytes for WASM importer")]
167    InputTooLarge {
168        /// Length the host attempted to send.
169        len: usize,
170        /// Host's enforced cap (`MAX_INPUT_BYTES`).
171        max: usize,
172    },
173    /// A required export exists but has the wrong signature. Distinct
174    /// from [`Self::MissingExport`] because `validate_module` already
175    /// proved presence at load time — a `get_typed_func` failure
176    /// thereafter is always a type mismatch, not absence.
177    #[error("WASM importer export `{name}` has wrong signature: {source}")]
178    ExportSignatureMismatch {
179        /// Name of the export.
180        name: &'static str,
181        /// Underlying wasmtime type-mismatch error.
182        source: anyhow::Error,
183    },
184    /// The importer does not advertise an ABI version (no
185    /// `__rustledger_abi_version` export). It was built without the
186    /// `wasm_importer_main!` macro or against a `plugin-types` from
187    /// before the ABI handshake existed; the host can't confirm wire
188    /// compatibility and refuses to run it (issue #1234).
189    #[error(
190        "WASM importer has a missing or invalid `{export}` export (expected signature \
191         `() -> u32`): it was built against an incompatible rustledger-plugin-types, or the \
192         export is absent, mistyped, or traps. Host requires ABI v{expected}. Rebuild against \
193         a matching rustledger-plugin-types."
194    )]
195    AbiVersionMissing {
196        /// The export symbol the host looked up.
197        export: &'static str,
198        /// ABI version the host speaks.
199        expected: u32,
200    },
201    /// The importer advertises a different ABI version than the host.
202    /// Running it would risk an opaque trap from a misread wire
203    /// message, so the host rejects it at load (issue #1234).
204    #[error(
205        "WASM importer ABI version mismatch: importer declares v{found}, host requires \
206         v{expected}. Rebuild against a matching rustledger-plugin-types."
207    )]
208    AbiVersionMismatch {
209        /// Version the importer reported.
210        found: u32,
211        /// Version the host requires.
212        expected: u32,
213    },
214}
215
216// `MemoryLimiter`, `StoreState`, `MAX_TABLE_ELEMENTS`, and the
217// `make_sandboxed_store` helper live in `rustledger_plugin::sandbox`
218// so the per-call enforcement is identical between the WASM importer
219// host and the directive-plugin runtime. See sandbox.rs for the
220// rationale + tests.
221
222// Note: no manual `impl From<WasmImporterError> for anyhow::Error` — `anyhow`
223// has a blanket impl for any `std::error::Error + Send + Sync + 'static`,
224// which thiserror's derive already satisfies. Adding our own would conflict.
225
226/// Wrap a `wasmtime::Error` in `WasmImporterError::Runtime`. Function form
227/// (not closure) so call sites stay terse: `.map_err(runtime_err)`.
228#[inline]
229fn runtime_err(e: wasmtime::Error) -> WasmImporterError {
230    WasmImporterError::Runtime(anyhow::Error::from(e))
231}
232
233/// A WASM-loaded importer. Implements [`Importer`] by dispatching to
234/// the loaded module's `extract` / `extract_enriched` entry points.
235///
236/// Cheap to clone — the [`Module`] is shared via `Arc` and the
237/// [`Engine`] is process-wide (see [`rustledger_plugin::sandbox`]).
238/// A fresh wasmtime [`Store`] is created per call, so concurrent
239/// extract calls don't share state.
240#[derive(Clone)]
241pub struct WasmImporter {
242    /// Filesystem path the module was loaded from (for diagnostics).
243    path: PathBuf,
244    /// Module's declared name (from the cached `metadata` call).
245    name: String,
246    /// Module's declared description (from the cached `metadata` call).
247    description: String,
248    /// Compiled module.
249    module: Arc<Module>,
250    /// Shared wasmtime engine — one per process, sourced from the
251    /// workspace's shared sandbox config in `rustledger_plugin`.
252    engine: Arc<Engine>,
253    /// Per-call runtime limits.
254    config: WasmRuntimeConfig,
255}
256
257impl std::fmt::Debug for WasmImporter {
258    /// Hand-rolled to avoid wasmtime's `Module`/`Engine` (whose `Debug`
259    /// outputs are noisy and version-dependent). Prints just the
260    /// host-side metadata that's useful for assertions and logging.
261    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
262        f.debug_struct("WasmImporter")
263            .field("path", &self.path)
264            .field("name", &self.name)
265            .field("description", &self.description)
266            .field("config", &self.config)
267            .finish_non_exhaustive()
268    }
269}
270
271impl WasmImporter {
272    /// Load a WASM importer from a `.wasm` file with default runtime
273    /// limits.
274    pub fn load(path: impl Into<PathBuf>) -> Result<Self, WasmImporterError> {
275        Self::load_with_config(path, WasmRuntimeConfig::default())
276    }
277
278    /// Load a WASM importer with custom runtime limits.
279    pub fn load_with_config(
280        path: impl Into<PathBuf>,
281        config: WasmRuntimeConfig,
282    ) -> Result<Self, WasmImporterError> {
283        let path = path.into();
284        let bytes = std::fs::read(&path).map_err(|source| WasmImporterError::Io {
285            path: path.clone(),
286            source,
287        })?;
288        Self::load_from_bytes(path, &bytes, config)
289    }
290
291    /// Load a WASM importer from in-memory bytes that aren't backed by
292    /// a real file. Use this when shipping `.wasm` modules embedded in
293    /// a binary or generated at runtime — `name_for_diagnostics`
294    /// surfaces in error messages and [`Self::path`] but doesn't have
295    /// to correspond to anything on disk.
296    pub fn load_embedded(
297        name_for_diagnostics: &str,
298        bytes: &[u8],
299    ) -> Result<Self, WasmImporterError> {
300        Self::load_from_bytes(
301            PathBuf::from(name_for_diagnostics),
302            bytes,
303            WasmRuntimeConfig::default(),
304        )
305    }
306
307    /// Load from in-memory WASM bytes — useful for tests and embedders
308    /// that ship the module inside their binary. The `path` is used
309    /// only for diagnostics; see [`Self::load_embedded`] for an
310    /// embedder-friendly wrapper.
311    pub fn load_from_bytes(
312        path: impl Into<PathBuf>,
313        bytes: &[u8],
314        config: WasmRuntimeConfig,
315    ) -> Result<Self, WasmImporterError> {
316        let path = path.into();
317
318        // Process-wide shared engine — amortizes the JIT/cache cost
319        // across all WASM-loaded modules in the workspace, and
320        // applies the same security-locked-down `Config` as the
321        // directive-plugin runtime.
322        let engine = sandbox::shared_engine();
323
324        let module = Module::new(&engine, bytes).map_err(|e| WasmImporterError::Compile {
325            path: path.clone(),
326            source: anyhow::Error::from(e),
327        })?;
328
329        Self::validate_module(&module)?;
330
331        let module = Arc::new(module);
332
333        // Call `metadata` once and cache the result. Importers don't
334        // change name/description across calls; this avoids paying the
335        // wasmtime instantiation cost on every `name()` / `description()`.
336        let metadata = call_metadata(&engine, &module, config)?;
337
338        Ok(Self {
339            path,
340            name: metadata.name,
341            description: metadata.description,
342            module,
343            engine,
344            config,
345        })
346    }
347
348    /// The path the module was loaded from (or the
349    /// `name_for_diagnostics` passed to [`Self::load_embedded`]).
350    #[must_use]
351    pub fn path(&self) -> &Path {
352        &self.path
353    }
354
355    /// The per-call runtime caps this importer was loaded with. Useful
356    /// for diagnostics ("did I hit the host's cap?") — the values
357    /// surfaced in error variants like
358    /// [`WasmImporterError::InputTooLarge::max`] are the same ones
359    /// returned here.
360    #[must_use]
361    pub const fn runtime_config(&self) -> WasmRuntimeConfig {
362        self.config
363    }
364
365    /// Reject imports (sandbox requirement) and check required exports.
366    fn validate_module(module: &Module) -> Result<(), WasmImporterError> {
367        if let Some(import) = module.imports().next() {
368            return Err(WasmImporterError::ForbiddenImport {
369                module: import.module().to_string(),
370                name: import.name().to_string(),
371            });
372        }
373
374        let exports: Vec<_> = module.exports().map(|e| e.name().to_string()).collect();
375        for required in &[
376            "memory",
377            "alloc",
378            "metadata",
379            "identify",
380            "extract",
381            "extract_enriched",
382        ] {
383            if !exports.iter().any(|n| n == required) {
384                return Err(WasmImporterError::MissingExport(required));
385            }
386        }
387        Ok(())
388    }
389
390    /// Wraps a wasmtime call that takes msgpack input and returns
391    /// msgpack output. The WASM module's entry-point convention:
392    /// `fn (ptr: u32, len: u32) -> u64` where the return packs
393    /// `(out_ptr << 32) | out_len`.
394    fn call_msgpack<I: Serialize, O: DeserializeOwned>(
395        &self,
396        entry: &'static str,
397        input: &I,
398    ) -> Result<O, WasmImporterError> {
399        call_msgpack_with(&self.engine, &self.module, self.config, entry, input)
400    }
401}
402
403/// Cap input length before the lossy `as u32` cast — wasm32 memory
404/// is u32-addressed, so >4 GiB input would silently truncate and
405/// corrupt the import. Returns the validated length as `u32` so
406/// callers don't need to repeat the cast.
407const fn validate_input_size(len: usize) -> Result<u32, WasmImporterError> {
408    if len > MAX_INPUT_BYTES {
409        return Err(WasmImporterError::InputTooLarge {
410            len,
411            max: MAX_INPUT_BYTES,
412        });
413    }
414    // Safe: `MAX_INPUT_BYTES` (64 MiB) fits in u32, and `len <= MAX_INPUT_BYTES`.
415    Ok(len as u32)
416}
417
418/// Read a packed `(out_ptr, out_len)` u64 from a WASM entry-point
419/// return, validate `out_len` against [`MAX_OUTPUT_BYTES`], and copy
420/// the bytes out of WASM memory.
421///
422/// Centralized so the cap is enforced uniformly across `metadata`,
423/// `identify`, `extract`, and `extract_enriched`.
424fn read_packed_output(
425    store: &Store<StoreState>,
426    memory: &wasmtime::Memory,
427    packed: u64,
428) -> Result<Vec<u8>, WasmImporterError> {
429    let out_ptr = (packed >> 32) as u32;
430    let out_len = (packed & 0xFFFF_FFFF) as u32 as usize;
431    if out_len > MAX_OUTPUT_BYTES {
432        return Err(WasmImporterError::OutputTooLarge {
433            len: out_len,
434            max: MAX_OUTPUT_BYTES,
435        });
436    }
437    let mut out_bytes = vec![0u8; out_len];
438    memory
439        .read(store, out_ptr as usize, &mut out_bytes)
440        .map_err(|e| WasmImporterError::Runtime(e.into()))?;
441    Ok(out_bytes)
442}
443
444/// Free-form wasmtime call helper. Extracted from `WasmImporter`'s
445/// methods so the load-time `metadata` call can use it before `self`
446/// is fully constructed.
447fn call_msgpack_with<I: Serialize, O: DeserializeOwned>(
448    engine: &Engine,
449    module: &Module,
450    config: WasmRuntimeConfig,
451    entry: &'static str,
452    input: &I,
453) -> Result<O, WasmImporterError> {
454    let input_bytes = rmp_serde::to_vec(input).map_err(WasmImporterError::Encode)?;
455    let input_len = validate_input_size(input_bytes.len())?;
456
457    let mut store = sandbox::make_sandboxed_store(engine, config.max_memory, config.max_time_secs)
458        .map_err(runtime_err)?;
459
460    // No imports at all — full sandbox.
461    let linker = Linker::new(engine);
462    let instance = linker
463        .instantiate(&mut store, module)
464        .map_err(runtime_err)?;
465
466    // For each `get_typed_func` below: `validate_module` already
467    // verified that the export exists at load time, so any error here
468    // is necessarily a signature mismatch (not absence). Surfacing it
469    // as `ExportSignatureMismatch` rather than `MissingExport` saves
470    // guest authors from chasing a misleading "export not found"
471    // error message.
472    // `validate_module` proved `memory` export presence at load time,
473    // so this `expect` documents an invariant rather than guarding a
474    // real failure path. (The variant `MissingExport("memory")` is
475    // reachable only via `validate_module` itself.)
476    let memory = instance
477        .get_memory(&mut store, "memory")
478        .expect("validate_module verified `memory` export at load");
479
480    let alloc = instance
481        .get_typed_func::<u32, u32>(&mut store, "alloc")
482        .map_err(|e| WasmImporterError::ExportSignatureMismatch {
483            name: "alloc",
484            source: anyhow::Error::from(e),
485        })?;
486
487    let input_ptr = alloc.call(&mut store, input_len).map_err(runtime_err)?;
488    memory
489        .write(&mut store, input_ptr as usize, &input_bytes)
490        .map_err(|e| WasmImporterError::Runtime(e.into()))?;
491
492    let func = instance
493        .get_typed_func::<(u32, u32), u64>(&mut store, entry)
494        .map_err(|e| WasmImporterError::ExportSignatureMismatch {
495            name: entry,
496            source: anyhow::Error::from(e),
497        })?;
498
499    let packed = func
500        .call(&mut store, (input_ptr, input_len))
501        .map_err(runtime_err)?;
502
503    let out_bytes = read_packed_output(&store, &memory, packed)?;
504    rmp_serde::from_slice(&out_bytes).map_err(WasmImporterError::Decode)
505}
506
507/// Special-case helper for the no-input `metadata` entry point. The
508/// WASM convention is `fn metadata() -> u64` returning the packed
509/// `(ptr, len)` of msgpack-encoded [`MetadataOutput`].
510fn call_metadata(
511    engine: &Engine,
512    module: &Module,
513    config: WasmRuntimeConfig,
514) -> Result<MetadataOutput, WasmImporterError> {
515    let mut store = sandbox::make_sandboxed_store(engine, config.max_memory, config.max_time_secs)
516        .map_err(runtime_err)?;
517
518    let linker = Linker::new(engine);
519    let instance = linker
520        .instantiate(&mut store, module)
521        .map_err(runtime_err)?;
522
523    // ABI handshake. `metadata` is the first thing the host calls on a
524    // freshly loaded importer, so this is the natural load-time gate:
525    // an importer built against an incompatible plugin-types is
526    // rejected here with a clear error instead of trapping opaquely
527    // inside a later `extract` (issue #1234). Subsequent identify /
528    // extract calls re-instantiate the same already-verified module,
529    // so they don't repeat the check.
530    match sandbox::check_guest_abi(&instance, &mut store) {
531        sandbox::AbiCheck::Match => {}
532        sandbox::AbiCheck::Missing => {
533            return Err(WasmImporterError::AbiVersionMissing {
534                export: rustledger_plugin_types::ABI_VERSION_EXPORT,
535                expected: sandbox::HOST_ABI_VERSION,
536            });
537        }
538        sandbox::AbiCheck::Mismatch { found } => {
539            return Err(WasmImporterError::AbiVersionMismatch {
540                found,
541                expected: sandbox::HOST_ABI_VERSION,
542            });
543        }
544    }
545
546    // Invariant: `validate_module` verified `memory` at load time.
547    let memory = instance
548        .get_memory(&mut store, "memory")
549        .expect("validate_module verified `memory` export at load");
550
551    // Same reasoning as in `call_msgpack_with`: validate_module
552    // proved presence, so a typed_func error is a signature mismatch.
553    let metadata = instance
554        .get_typed_func::<(), u64>(&mut store, "metadata")
555        .map_err(|e| WasmImporterError::ExportSignatureMismatch {
556            name: "metadata",
557            source: anyhow::Error::from(e),
558        })?;
559
560    let packed = metadata.call(&mut store, ()).map_err(runtime_err)?;
561    let out_bytes = read_packed_output(&store, &memory, packed)?;
562    rmp_serde::from_slice(&out_bytes).map_err(WasmImporterError::Decode)
563}
564
565/// Flatten the host's [`ImporterConfig`] into the wire-format
566/// [`ImporterInput`] expected by the WASM module. A *subset* of
567/// CSV-specific config fields is serialized into the free-form
568/// `options` map — see [`project_csv_config_into_options`] for the
569/// list and what's deferred.
570fn build_wasm_input(path: &Path, content: Vec<u8>, config: &ImporterConfig) -> ImporterInput {
571    let mut options = std::collections::HashMap::new();
572    let ImporterType::Csv(csv) = &config.importer_type;
573    project_csv_config_into_options(csv, &mut options);
574    ImporterInput {
575        path: path.to_string_lossy().into_owned(),
576        content,
577        account: config.account.clone(),
578        currency: config.currency.clone(),
579        options,
580    }
581}
582
583/// Project a *subset* of [`CsvConfig`] into the wire-format `options`
584/// map. String-encoded per the ABI's String→String contract.
585///
586/// # Currently projected
587///
588/// - `date_format`, `delimiter`, `has_header`, `skip_rows`,
589///   `invert_sign`, `skip_zero_amounts` — simple String/bool/number
590/// - `default_expense`, `default_income` — `Option<String>`
591///
592/// # Deferred to wave 2.3e+
593///
594/// The richer fields — `date_column` / `narration_column` /
595/// `payee_column` / `amount_column` / `debit_column` /
596/// `credit_column` (`ColumnSpec` enum: name OR index), `amount_locale`
597/// / `amount_format`, `mappings` / `regex_mappings` (`Vec<(String,
598/// String)>`), `use_merchant_dict` — are not yet projected. Encoding
599/// them in a String→String map needs design decisions (key prefixes,
600/// JSON-in-string, parallel collections?) that are best driven by a
601/// real WASM CSV importer in wave 2.3e rather than guessed now.
602///
603/// A WASM importer in 2.3b can still extract from CSV files; it just
604/// has to implement its own column-spec discovery rather than
605/// inheriting the host's. Most non-CSV importers (OFX, MT940, …)
606/// don't need any of the deferred fields.
607fn project_csv_config_into_options(
608    csv: &CsvConfig,
609    options: &mut std::collections::HashMap<String, String>,
610) {
611    options.insert("date_format".to_string(), csv.date_format.clone());
612    options.insert("delimiter".to_string(), csv.delimiter.to_string());
613    options.insert("has_header".to_string(), csv.has_header.to_string());
614    options.insert("skip_rows".to_string(), csv.skip_rows.to_string());
615    options.insert("invert_sign".to_string(), csv.invert_sign.to_string());
616    options.insert(
617        "skip_zero_amounts".to_string(),
618        csv.skip_zero_amounts.to_string(),
619    );
620    if let Some(de) = &csv.default_expense {
621        options.insert("default_expense".to_string(), de.clone());
622    }
623    if let Some(di) = &csv.default_income {
624        options.insert("default_income".to_string(), di.clone());
625    }
626}
627
628/// Format a [`PluginError`] into a single human-readable line that
629/// preserves the severity ("error" vs "warning") and avoids orphan
630/// colons when location fields are absent.
631///
632/// Examples:
633/// - severity=Error, file="foo.csv", line=42 → `"error foo.csv:42: bad row"`
634/// - severity=Warning, file="foo.csv", line=None → `"warning foo.csv: weird value"`
635/// - severity=Warning, file=None, line=Some(7) → `"warning line 7: weird value"`
636/// - severity=Error, file=None, line=None → `"error: parser bug"`
637fn format_plugin_error(e: &PluginError) -> String {
638    let severity = match e.severity {
639        PluginErrorSeverity::Error => "error",
640        PluginErrorSeverity::Warning => "warning",
641    };
642    let location = match (&e.source_file, e.line_number) {
643        (Some(f), Some(n)) => format!(" {f}:{n}"),
644        (Some(f), None) => format!(" {f}"),
645        (None, Some(n)) => format!(" line {n}"),
646        (None, None) => String::new(),
647    };
648    format!("{severity}{location}: {}", e.message)
649}
650
651/// Materialize an [`ImporterOutput`] wire-format value back to the
652/// host-side [`ImportResult`]. Delegates wrapper→directive conversion
653/// to `rustledger_plugin::convert::wrapper_to_directive` so the WASM
654/// importer path and the directive-plugin path share a single
655/// converter — improvements there land here for free.
656///
657/// # Warning ordering
658///
659/// Warnings are appended in this order:
660///
661/// 1. **Output warnings** — `output.warnings` forwarded verbatim.
662/// 2. **Output errors** — `output.errors`, formatted via
663///    [`format_plugin_error`] so the severity prefix is preserved.
664///
665/// (The enriched analogue [`bridge_enriched_output`] additionally
666/// emits *bridge warnings* first, for per-entry lossy paths that have
667/// no analogue here.)
668fn output_to_import_result(out: ImporterOutput) -> anyhow::Result<ImportResult> {
669    let mut directives = Vec::with_capacity(out.directives.len());
670    for w in out.directives {
671        let d = rustledger_plugin::convert::wrapper_to_directive(&w)
672            .map_err(|e| anyhow::anyhow!("WASM importer returned invalid directive: {e:?}"))?;
673        directives.push(d);
674    }
675    let mut result = ImportResult::new(directives);
676    for w in out.warnings {
677        result = result.with_warning(w);
678    }
679    // Errors and warnings flow through the same `warnings` channel,
680    // but the formatted string preserves the severity prefix so a
681    // fatal-but-recoverable importer error is still distinguishable
682    // from informational chatter. The structured error path
683    // (`LedgerError::location`) is reserved for the loader layer.
684    for e in &out.errors {
685        result = result.with_warning(format_plugin_error(e));
686    }
687    Ok(result)
688}
689
690impl Importer for WasmImporter {
691    fn name(&self) -> &str {
692        &self.name
693    }
694
695    fn description(&self) -> &str {
696        &self.description
697    }
698
699    fn identify(&self, path: &Path) -> bool {
700        let input = IdentifyInput {
701            path: path.to_string_lossy().into_owned(),
702        };
703        // The trait contract is `-> bool` (matches OFX/CSV), so we
704        // can't surface a structured error. But "wrong signature on
705        // `identify`" or "module trapped" are real bugs the guest
706        // author needs to see — emit to stderr so they get a signal
707        // instead of silently never matching. Successful identify
708        // calls are quiet.
709        match self.call_msgpack::<_, IdentifyOutput>("identify", &input) {
710            Ok(out) => out.matches,
711            Err(e) => {
712                eprintln!(
713                    "warning: WASM importer `{}` identify({}) failed: {e}",
714                    self.name,
715                    path.display()
716                );
717                false
718            }
719        }
720    }
721
722    fn extract(&self, path: &Path, config: &ImporterConfig) -> anyhow::Result<ImportResult> {
723        // Use the typed `Io` variant before erasing to anyhow at the
724        // trait boundary — keeps load and extract symmetric on file-
725        // read failures, even though only the typed-error name is
726        // observable to crate-internal callers.
727        let content = std::fs::read(path).map_err(|source| WasmImporterError::Io {
728            path: path.to_path_buf(),
729            source,
730        })?;
731        let input = build_wasm_input(path, content, config);
732        let output: ImporterOutput = self.call_msgpack("extract", &input)?;
733        output_to_import_result(output)
734    }
735
736    fn extract_enriched(
737        &self,
738        path: &Path,
739        config: &ImporterConfig,
740    ) -> anyhow::Result<EnrichedImportResult> {
741        let content = std::fs::read(path).map_err(|source| WasmImporterError::Io {
742            path: path.to_path_buf(),
743            source,
744        })?;
745        let input = build_wasm_input(path, content, config);
746        let output: EnrichedImporterOutput = self.call_msgpack("extract_enriched", &input)?;
747        bridge_enriched_output(output)
748    }
749}
750
751/// Bridge a wire-format [`EnrichedImporterOutput`] into the host's
752/// [`EnrichedImportResult`]. Extracted as a free function so the lossy
753/// paths (unknown method strings, malformed fingerprint hex) can be
754/// unit-tested without standing up wasmtime.
755///
756/// # Warning ordering
757///
758/// Warnings are emitted in this order, which is part of the contract
759/// for any downstream consumer that filters or surfaces them:
760///
761/// 1. **Bridge warnings** (per-entry lossy paths: unknown method,
762///    malformed fingerprint hex) — host-side issues with the wire
763///    data, surface first so the importer author sees them prominently.
764/// 2. **Output warnings** (importer's own informational warnings),
765///    forwarded verbatim from `output.warnings`.
766/// 3. **Output errors** (importer's structured errors), formatted via
767///    [`format_plugin_error`] which preserves severity prefix.
768fn bridge_enriched_output(output: EnrichedImporterOutput) -> anyhow::Result<EnrichedImportResult> {
769    let mut entries = Vec::with_capacity(output.entries.len());
770    let mut bridge_warnings: Vec<String> = Vec::new();
771    for (wrapper, enr) in output.entries {
772        let dir = rustledger_plugin::convert::wrapper_to_directive(&wrapper)
773            .map_err(|e| anyhow::anyhow!("WASM importer returned invalid directive: {e:?}"))?;
774        let method = parse_method(&enr.method).unwrap_or_else(|unknown| {
775            bridge_warnings.push(format!(
776                "warning: WASM importer used unknown categorization method `{unknown}`, falling back to Default"
777            ));
778            rustledger_ops::enrichment::CategorizationMethod::Default
779        });
780        let alternatives = enr
781            .alternatives
782            .into_iter()
783            .map(|a| {
784                let alt_method = parse_method(&a.method).unwrap_or_else(|unknown| {
785                    bridge_warnings.push(format!(
786                        "warning: WASM importer used unknown categorization method `{unknown}` in alternative, falling back to Default"
787                    ));
788                    rustledger_ops::enrichment::CategorizationMethod::Default
789                });
790                rustledger_ops::enrichment::Alternative {
791                    account: a.account,
792                    confidence: a.confidence,
793                    method: alt_method,
794                }
795            })
796            .collect();
797        let fingerprint = match enr.fingerprint {
798            Some(hex) => match Fingerprint::from_hex(&hex) {
799                Ok(fp) => Some(fp),
800                Err(e) => {
801                    bridge_warnings.push(format!(
802                        "warning: WASM importer returned malformed fingerprint hex `{hex}`: {e}"
803                    ));
804                    None
805                }
806            },
807            None => None,
808        };
809        let enrichment = rustledger_ops::enrichment::Enrichment {
810            directive_index: enr.directive_index,
811            confidence: enr.confidence,
812            method,
813            alternatives,
814            fingerprint,
815        };
816        entries.push((dir, enrichment));
817    }
818    let mut enriched = EnrichedImportResult::new(entries);
819    for w in bridge_warnings {
820        enriched = enriched.with_warning(w);
821    }
822    for w in output.warnings {
823        enriched = enriched.with_warning(w);
824    }
825    for e in &output.errors {
826        enriched = enriched.with_warning(format_plugin_error(e));
827    }
828    Ok(enriched)
829}
830
831/// Convert the wire-format method string (as emitted by
832/// `CategorizationMethod::as_meta_value`) back into the host enum.
833///
834/// Returns `Err(unknown)` for strings the host doesn't recognize — the
835/// caller is expected to surface a warning and fall back to
836/// `CategorizationMethod::Default`. We don't silently absorb unknown
837/// strings here: a typo like `"merchant_dict"` vs `"merchant-dict"`
838/// (the exact Copilot-flagged bug from #1130) would otherwise degrade
839/// data without any signal to the user.
840fn parse_method(s: &str) -> Result<rustledger_ops::enrichment::CategorizationMethod, &str> {
841    use rustledger_ops::enrichment::CategorizationMethod;
842    match s {
843        "rule" => Ok(CategorizationMethod::Rule),
844        "merchant-dict" => Ok(CategorizationMethod::MerchantDict),
845        "ml" => Ok(CategorizationMethod::Ml),
846        "llm" => Ok(CategorizationMethod::Llm),
847        "manual" => Ok(CategorizationMethod::Manual),
848        "default" => Ok(CategorizationMethod::Default),
849        unknown => Err(unknown),
850    }
851}
852
853#[cfg(test)]
854mod tests {
855    use super::*;
856
857    #[test]
858    fn wasm_runtime_config_default_is_sensible() {
859        let c = WasmRuntimeConfig::default();
860        // Sourced from the workspace-wide sandbox constants. A future
861        // bump propagates here without manual update; this test pins
862        // that the importer continues to track the shared defaults
863        // rather than drifting to a local literal.
864        assert_eq!(c.max_memory, sandbox::DEFAULT_SANDBOX_MAX_MEMORY);
865        assert_eq!(c.max_time_secs, sandbox::DEFAULT_SANDBOX_MAX_TIME_SECS);
866    }
867
868    #[test]
869    fn validate_module_rejects_module_with_imports() {
870        // A WAT module with a single import — should be rejected.
871        let wat = r#"
872            (module
873                (import "env" "ext" (func $ext))
874                (memory (export "memory") 1)
875                (func (export "alloc") (param i32) (result i32) i32.const 0)
876                (func (export "metadata") (result i64) i64.const 0)
877                (func (export "identify") (param i32 i32) (result i64) i64.const 0)
878                (func (export "extract") (param i32 i32) (result i64) i64.const 0)
879                (func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
880            )
881        "#;
882        let bytes = wat::parse_str(wat).expect("WAT parses");
883        let engine = sandbox::shared_engine();
884        let module = Module::new(&engine, &bytes).unwrap();
885        let err = WasmImporter::validate_module(&module).unwrap_err();
886        assert!(matches!(err, WasmImporterError::ForbiddenImport { .. }));
887    }
888
889    #[test]
890    fn validate_module_rejects_missing_export() {
891        // Has memory + alloc + metadata but missing identify/extract/extract_enriched.
892        let wat = r#"
893            (module
894                (memory (export "memory") 1)
895                (func (export "alloc") (param i32) (result i32) i32.const 0)
896                (func (export "metadata") (result i64) i64.const 0)
897            )
898        "#;
899        let bytes = wat::parse_str(wat).expect("WAT parses");
900        let engine = sandbox::shared_engine();
901        let module = Module::new(&engine, &bytes).unwrap();
902        let err = WasmImporter::validate_module(&module).unwrap_err();
903        assert!(matches!(err, WasmImporterError::MissingExport(_)));
904    }
905
906    #[test]
907    fn parse_method_round_trips_known_values() {
908        use rustledger_ops::enrichment::CategorizationMethod;
909        assert!(matches!(
910            parse_method("rule"),
911            Ok(CategorizationMethod::Rule)
912        ));
913        assert!(matches!(
914            parse_method("merchant-dict"),
915            Ok(CategorizationMethod::MerchantDict)
916        ));
917        assert!(matches!(parse_method("ml"), Ok(CategorizationMethod::Ml)));
918        assert!(matches!(parse_method("llm"), Ok(CategorizationMethod::Llm)));
919        assert!(matches!(
920            parse_method("manual"),
921            Ok(CategorizationMethod::Manual)
922        ));
923        assert!(matches!(
924            parse_method("default"),
925            Ok(CategorizationMethod::Default)
926        ));
927    }
928
929    #[test]
930    fn parse_method_round_trips_via_as_meta_value() {
931        // Pin the contract: every `CategorizationMethod` round-trips
932        // through its `as_meta_value()` string. If a host variant is
933        // added without updating `parse_method`, this test fails.
934        use rustledger_ops::enrichment::CategorizationMethod;
935        for m in [
936            CategorizationMethod::Rule,
937            CategorizationMethod::MerchantDict,
938            CategorizationMethod::Ml,
939            CategorizationMethod::Llm,
940            CategorizationMethod::Manual,
941            CategorizationMethod::Default,
942        ] {
943            let s = m.as_meta_value();
944            let parsed = parse_method(s)
945                .unwrap_or_else(|u| panic!("as_meta_value `{u}` not handled by parse_method"));
946            assert_eq!(parsed, m, "round-trip failed for {m:?}");
947        }
948    }
949
950    #[test]
951    fn parse_method_unknown_surfaces_the_unknown_string() {
952        // Previously: silently fell back to Default. Now: returns
953        // Err(unknown) so the caller can warn — protects against
954        // typos like `merchant_dict` (underscore) vs `merchant-dict`
955        // (hyphen, the actual wire encoding from
956        // `CategorizationMethod::as_meta_value`).
957        assert_eq!(parse_method("future-method"), Err("future-method"));
958        assert_eq!(parse_method("merchant_dict"), Err("merchant_dict"));
959        assert_eq!(parse_method(""), Err(""));
960    }
961
962    #[test]
963    fn format_plugin_error_with_full_location() {
964        let e = PluginError::error("bad row").at("foo.csv", 42);
965        assert_eq!(format_plugin_error(&e), "error foo.csv:42: bad row");
966    }
967
968    #[test]
969    fn format_plugin_error_warning_severity() {
970        let e = PluginError::warning("weird value").at("foo.csv", 42);
971        assert_eq!(format_plugin_error(&e), "warning foo.csv:42: weird value");
972    }
973
974    #[test]
975    fn format_plugin_error_no_location_no_orphan_colon() {
976        let e = PluginError::error("parser bug");
977        // Previously: ": parser bug" (orphan colon). Now: "error: parser bug".
978        assert_eq!(format_plugin_error(&e), "error: parser bug");
979    }
980
981    #[test]
982    fn format_plugin_error_file_only() {
983        let e = PluginError::warning("weird value");
984        let e = PluginError {
985            source_file: Some("foo.csv".to_string()),
986            ..e
987        };
988        assert_eq!(format_plugin_error(&e), "warning foo.csv: weird value");
989    }
990
991    #[test]
992    fn format_plugin_error_line_only_uses_human_phrasing() {
993        // Previously: ":42: weird" (orphan colon). Now: "warning line 42: weird".
994        let e = PluginError::warning("weird");
995        let e = PluginError {
996            line_number: Some(42),
997            ..e
998        };
999        assert_eq!(format_plugin_error(&e), "warning line 42: weird");
1000    }
1001
1002    /// Build a WAT module that pre-loads `MessagePack` outputs for every
1003    /// entry point in low memory and returns hardcoded packed
1004    /// `(ptr, len)` u64s. `alloc` is a bump allocator starting at
1005    /// offset 1024, so host-allocated input never overlaps the
1006    /// pre-loaded data.
1007    ///
1008    /// Wire-format bytes are rmp-serde's default positional encoding
1009    /// (struct → fixarray-N, fields in declaration order).
1010    fn roundtrip_wat() -> &'static str {
1011        r#"
1012        (module
1013            (memory (export "memory") 1)
1014
1015            ;; MetadataOutput { name: "tst", description: "tst" }
1016            ;; 0x92 fixarray-2, 0xa3 fixstr-3 "tst", 0xa3 fixstr-3 "tst"
1017            (data (i32.const 0) "\92\a3tst\a3tst")
1018
1019            ;; IdentifyOutput { matches: true }
1020            ;; 0x91 fixarray-1, 0xc3 true
1021            (data (i32.const 16) "\91\c3")
1022
1023            ;; ImporterOutput { directives: [], warnings: [], errors: [] }
1024            ;; 0x93 fixarray-3, then three 0x90 fixarray-0
1025            (data (i32.const 24) "\93\90\90\90")
1026
1027            ;; EnrichedImporterOutput { entries: [], warnings: [], errors: [] }
1028            (data (i32.const 32) "\93\90\90\90")
1029
1030            ;; bump allocator: hand out at $bump, advance by $size.
1031            ;; NOTE: real importers MUST bounds-check $bump+$size
1032            ;; against current memory and call `memory.grow` (subject
1033            ;; to MemoryLimiter approval). This test fixture skips
1034            ;; that — inputs in the test are small and we declare 1
1035            ;; full page (64 KiB), so the bump never crosses the
1036            ;; boundary.
1037            (global $bump (mut i32) (i32.const 1024))
1038            (func (export "alloc") (param $size i32) (result i32)
1039                (local $ret i32)
1040                global.get $bump
1041                local.set $ret
1042                global.get $bump
1043                local.get $size
1044                i32.add
1045                global.set $bump
1046                local.get $ret)
1047
1048            ;; metadata: ptr=0, len=9 → (0<<32) | 9 = 9
1049            (func (export "metadata") (result i64)
1050                i64.const 9)
1051
1052            ;; identify: ptr=16, len=2 → (16<<32) | 2
1053            (func (export "identify") (param i32 i32) (result i64)
1054                i64.const 0x10_0000_0002)
1055
1056            ;; extract: ptr=24, len=4 → (24<<32) | 4
1057            (func (export "extract") (param i32 i32) (result i64)
1058                i64.const 0x18_0000_0004)
1059
1060            ;; extract_enriched: ptr=32, len=4 → (32<<32) | 4
1061            (func (export "extract_enriched") (param i32 i32) (result i64)
1062                i64.const 0x20_0000_0004)
1063
1064            ;; ABI handshake export. Must equal sandbox::HOST_ABI_VERSION
1065            ;; (rustledger_plugin_types::ABI_VERSION = 1). If the ABI
1066            ;; version is ever bumped, this literal moves in lockstep —
1067            ;; the deliberate test update that proves a real guest would
1068            ;; need rebuilding too.
1069            (func (export "__rustledger_abi_version") (result i32)
1070                i32.const 1)
1071        )
1072        "#
1073    }
1074
1075    fn minimal_config() -> ImporterConfig {
1076        ImporterConfig {
1077            account: "Assets:Bank:Checking".to_string(),
1078            currency: Some("USD".to_string()),
1079            importer_type: ImporterType::Csv(CsvConfig::default()),
1080        }
1081    }
1082
1083    /// A WAT importer with every required export present (so
1084    /// `validate_module` passes) but a configurable
1085    /// `__rustledger_abi_version`. `abi_section` is spliced in verbatim,
1086    /// so a caller can omit it entirely to model a pre-handshake guest.
1087    /// `metadata` returns junk on purpose — the ABI check runs before
1088    /// the host ever reads it, so these fixtures never need real
1089    /// `MessagePack`.
1090    fn importer_wat_with_abi(abi_section: &str) -> String {
1091        format!(
1092            r#"
1093            (module
1094                (memory (export "memory") 1)
1095                (func (export "alloc") (param i32) (result i32) i32.const 0)
1096                (func (export "metadata") (result i64) i64.const 0)
1097                (func (export "identify") (param i32 i32) (result i64) i64.const 0)
1098                (func (export "extract") (param i32 i32) (result i64) i64.const 0)
1099                (func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
1100                {abi_section}
1101            )
1102            "#
1103        )
1104    }
1105
1106    /// Issue #1234: an importer that doesn't advertise an ABI version
1107    /// is rejected at load with a clear error, not an opaque trap on
1108    /// the first `extract`.
1109    #[test]
1110    fn load_rejects_importer_missing_abi_version() {
1111        let bytes = wat::parse_str(importer_wat_with_abi("")).expect("WAT parses");
1112        let err = WasmImporter::load_from_bytes(
1113            PathBuf::from("noabi.wasm"),
1114            &bytes,
1115            WasmRuntimeConfig::default(),
1116        )
1117        .expect_err("load must reject an importer with no ABI export");
1118        assert!(
1119            matches!(err, WasmImporterError::AbiVersionMissing { .. }),
1120            "expected AbiVersionMissing, got: {err:?}"
1121        );
1122    }
1123
1124    /// Issue #1234: an importer built against a different ABI version
1125    /// is rejected at load, naming both versions.
1126    #[test]
1127    fn load_rejects_importer_with_mismatched_abi_version() {
1128        // 999 is deliberately not the host ABI version.
1129        let wat = importer_wat_with_abi(
1130            r#"(func (export "__rustledger_abi_version") (result i32) i32.const 999)"#,
1131        );
1132        let bytes = wat::parse_str(wat).expect("WAT parses");
1133        let err = WasmImporter::load_from_bytes(
1134            PathBuf::from("badabi.wasm"),
1135            &bytes,
1136            WasmRuntimeConfig::default(),
1137        )
1138        .expect_err("load must reject an ABI-mismatched importer");
1139        match err {
1140            WasmImporterError::AbiVersionMismatch { found, expected } => {
1141                assert_eq!(found, 999);
1142                assert_eq!(expected, sandbox::HOST_ABI_VERSION);
1143            }
1144            other => panic!("expected AbiVersionMismatch, got: {other:?}"),
1145        }
1146    }
1147
1148    #[test]
1149    fn end_to_end_wat_module_round_trips_all_entry_points() {
1150        let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1151        let importer = WasmImporter::load_from_bytes(
1152            PathBuf::from("test.wasm"),
1153            &bytes,
1154            WasmRuntimeConfig::default(),
1155        )
1156        .expect("module loads + metadata round-trips");
1157
1158        // metadata was decoded once at load and cached for these
1159        // accessors — proves the MetadataOutput msgpack flowed end to
1160        // end through the host.
1161        assert_eq!(importer.name(), "tst");
1162        assert_eq!(importer.description(), "tst");
1163
1164        // identify round-trip — input ignored, module hardcodes true.
1165        assert!(importer.identify(Path::new("anything.csv")));
1166
1167        // extract + extract_enriched need a real file for std::fs::read.
1168        let tmp = tempfile::NamedTempFile::new().expect("tempfile");
1169        let config = minimal_config();
1170
1171        let result = importer
1172            .extract(tmp.path(), &config)
1173            .expect("extract round-trip");
1174        assert!(result.directives.is_empty());
1175        assert!(result.warnings.is_empty());
1176
1177        let enriched = importer
1178            .extract_enriched(tmp.path(), &config)
1179            .expect("extract_enriched round-trip");
1180        assert!(enriched.entries.is_empty());
1181        assert!(enriched.warnings.is_empty());
1182    }
1183
1184    #[test]
1185    fn oversized_output_is_rejected_before_allocation() {
1186        // Module's metadata() returns out_len = u32::MAX. Without the
1187        // MAX_OUTPUT_BYTES check, the host would attempt a ~4 GiB Vec
1188        // allocation. The check should catch it during load.
1189        let wat = r#"
1190            (module
1191                (memory (export "memory") 1)
1192                (func (export "alloc") (param i32) (result i32) i32.const 0)
1193                ;; metadata: ptr=0, len=u32::MAX
1194                (func (export "metadata") (result i64)
1195                    i64.const 0x0000_0000_ffff_ffff)
1196                (func (export "identify") (param i32 i32) (result i64) i64.const 0)
1197                (func (export "extract") (param i32 i32) (result i64) i64.const 0)
1198                (func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
1199                ;; ABI handshake passes so the oversized-metadata check
1200                ;; downstream is what rejects this module (issue #1234).
1201                (func (export "__rustledger_abi_version") (result i32) i32.const 1)
1202            )
1203        "#;
1204        let bytes = wat::parse_str(wat).expect("WAT parses");
1205        // Can't use `.expect_err(...)` here — `WasmImporter` doesn't
1206        // implement `Debug` (the wasmtime `Module`/`Engine` it holds
1207        // aren't trivially debuggable), so we destructure manually.
1208        let Err(err) = WasmImporter::load_from_bytes(
1209            PathBuf::from("oversized.wasm"),
1210            &bytes,
1211            WasmRuntimeConfig::default(),
1212        ) else {
1213            panic!("oversized metadata output should have been rejected at load");
1214        };
1215        assert!(
1216            matches!(
1217                err,
1218                WasmImporterError::OutputTooLarge { len, max }
1219                    if len == u32::MAX as usize && max == MAX_OUTPUT_BYTES
1220            ),
1221            "expected OutputTooLarge, got {err:?}"
1222        );
1223    }
1224
1225    // Note: `memory_limiter_rejects_grow_above_max` and
1226    // `table_limiter_rejects_grow_above_max` live in
1227    // `rustledger_plugin::sandbox::tests` now that the limiter
1228    // itself was hoisted there. The integration test below
1229    // (`initial_memory_above_cap_is_rejected_via_limiter_wiring`)
1230    // still proves the importer's load path wires it correctly.
1231
1232    #[test]
1233    fn zero_max_time_secs_does_not_starve_fuel() {
1234        // Regression: previously fuel = 0 * 1_000_000 = 0, causing
1235        // immediate trap on first instruction. Now clamped via
1236        // .max(1) so a 0 config still gets enough fuel to complete a
1237        // trivial call.
1238        let config = WasmRuntimeConfig {
1239            max_memory: sandbox::DEFAULT_SANDBOX_MAX_MEMORY,
1240            max_time_secs: 0,
1241        };
1242        let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1243        // Loading calls metadata(), which is a single i64.const +
1244        // return — well under 1M instructions.
1245        let importer = WasmImporter::load_from_bytes(PathBuf::from("test.wasm"), &bytes, config)
1246            .expect("zero max_time_secs is clamped, not starved");
1247        assert_eq!(importer.name(), "tst");
1248    }
1249
1250    #[test]
1251    fn validate_input_size_accepts_at_cap_and_rejects_above() {
1252        // Exactly at the cap is fine.
1253        assert_eq!(
1254            validate_input_size(MAX_INPUT_BYTES).unwrap(),
1255            MAX_INPUT_BYTES as u32
1256        );
1257        // One byte over is rejected, with the offending length surfaced
1258        // in the error so the user can see how much they overshot.
1259        let err = validate_input_size(MAX_INPUT_BYTES + 1).unwrap_err();
1260        assert!(
1261            matches!(
1262                err,
1263                WasmImporterError::InputTooLarge { len, max }
1264                    if len == MAX_INPUT_BYTES + 1 && max == MAX_INPUT_BYTES
1265            ),
1266            "got: {err:?}"
1267        );
1268    }
1269
1270    #[test]
1271    fn fuel_calc_saturates_instead_of_overflowing() {
1272        // Regression for Copilot #2: u64::MAX max_time_secs would have
1273        // overflowed in release (silent wrap to a tiny number ⇒ fuel
1274        // starvation) and panicked in debug. Saturating_mul caps at
1275        // u64::MAX which set_fuel accepts.
1276        let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1277        let config = WasmRuntimeConfig {
1278            max_memory: sandbox::DEFAULT_SANDBOX_MAX_MEMORY,
1279            max_time_secs: u64::MAX,
1280        };
1281        // Successful load proves the fuel calc didn't panic and the
1282        // resulting saturated value is acceptable to set_fuel.
1283        let importer = WasmImporter::load_from_bytes(PathBuf::from("test.wasm"), &bytes, config)
1284            .expect("u64::MAX max_time_secs saturates, doesn't overflow");
1285        assert_eq!(importer.name(), "tst");
1286    }
1287
1288    #[test]
1289    fn wrong_signature_export_surfaces_export_signature_mismatch() {
1290        // `metadata` is declared but with the wrong signature
1291        // (returns i32 instead of i64). `validate_module` checks
1292        // presence-by-name only, so this passes validate. The
1293        // signature error surfaces when `call_metadata` tries
1294        // `get_typed_func::<(), u64>`.
1295        let wat = r#"
1296            (module
1297                (memory (export "memory") 1)
1298                (func (export "alloc") (param i32) (result i32) i32.const 0)
1299                ;; WRONG: should be (result i64), declared as (result i32)
1300                (func (export "metadata") (result i32) i32.const 0)
1301                (func (export "identify") (param i32 i32) (result i64) i64.const 0)
1302                (func (export "extract") (param i32 i32) (result i64) i64.const 0)
1303                (func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
1304                ;; Correct ABI so the check passes and the metadata
1305                ;; signature mismatch is what surfaces (issue #1234).
1306                (func (export "__rustledger_abi_version") (result i32) i32.const 1)
1307            )
1308        "#;
1309        let bytes = wat::parse_str(wat).expect("WAT parses");
1310        let Err(err) = WasmImporter::load_from_bytes(
1311            PathBuf::from("badsig.wasm"),
1312            &bytes,
1313            WasmRuntimeConfig::default(),
1314        ) else {
1315            panic!("metadata with wrong signature should be rejected");
1316        };
1317        // Previously: silently surfaced as MissingExport("metadata"),
1318        // which is misleading because the export DOES exist. Now:
1319        // ExportSignatureMismatch names the export and includes the
1320        // wasmtime type-mismatch error in the source chain.
1321        assert!(
1322            matches!(
1323                err,
1324                WasmImporterError::ExportSignatureMismatch {
1325                    name: "metadata",
1326                    ..
1327                }
1328            ),
1329            "expected ExportSignatureMismatch for metadata, got {err:?}"
1330        );
1331    }
1332
1333    #[test]
1334    fn initial_memory_above_cap_is_rejected_via_limiter_wiring() {
1335        // Pins the `store.limiter(|s| &mut s.limiter)` wiring against
1336        // refactor regression. wasmtime calls `memory_growing` for
1337        // both initial allocation and grow — a module declaring 5000
1338        // pages (320 MiB) initial memory with a 64 MiB cap should
1339        // fail to instantiate. If the limiter wiring breaks, this
1340        // test catches it (the direct trait-method test above does
1341        // not).
1342        let wat = r#"
1343            (module
1344                (memory (export "memory") 5000)
1345                (func (export "alloc") (param i32) (result i32) i32.const 0)
1346                (func (export "metadata") (result i64) i64.const 0)
1347                (func (export "identify") (param i32 i32) (result i64) i64.const 0)
1348                (func (export "extract") (param i32 i32) (result i64) i64.const 0)
1349                (func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
1350            )
1351        "#;
1352        let bytes = wat::parse_str(wat).expect("WAT parses");
1353        let config = WasmRuntimeConfig {
1354            max_memory: 64 * 1024 * 1024,
1355            max_time_secs: 30,
1356        };
1357        let Err(err) = WasmImporter::load_from_bytes(PathBuf::from("bigmem.wasm"), &bytes, config)
1358        else {
1359            panic!("module declaring 320 MiB initial memory should be rejected with 64 MiB cap");
1360        };
1361        // wasmtime turns Ok(false) at instantiation into an instantiate
1362        // error, which the host maps to Runtime.
1363        assert!(
1364            matches!(err, WasmImporterError::Runtime(_)),
1365            "expected Runtime (instantiate failed via limiter), got {err:?}"
1366        );
1367    }
1368
1369    // ===== bridge_enriched_output direct tests =====
1370    //
1371    // These exercise the lossy paths (unknown method, malformed
1372    // fingerprint hex, valid fingerprint round-trip) without standing
1373    // up wasmtime — the bridge logic is the testable piece, the
1374    // wasmtime round-trip is covered by the end-to-end WAT test.
1375
1376    use rustledger_plugin_types::{
1377        AlternativeWrapper, DirectiveData, DirectiveWrapper, EnrichmentWrapper, OpenData,
1378    };
1379
1380    fn open_wrapper(account: &str) -> DirectiveWrapper {
1381        DirectiveWrapper {
1382            directive_type: String::new(),
1383            date: "2024-01-01".to_string(),
1384            filename: None,
1385            lineno: None,
1386            data: DirectiveData::Open(OpenData {
1387                account: account.to_string(),
1388                currencies: vec![],
1389                booking: None,
1390                metadata: vec![],
1391            }),
1392        }
1393    }
1394
1395    fn enrichment_wrapper(method: &str, fingerprint: Option<String>) -> EnrichmentWrapper {
1396        EnrichmentWrapper {
1397            directive_index: 0,
1398            confidence: 1.0,
1399            method: method.to_string(),
1400            alternatives: vec![],
1401            fingerprint,
1402        }
1403    }
1404
1405    #[test]
1406    fn bridge_round_trips_valid_fingerprint_hex() {
1407        let fp = Fingerprint::compute("2024-01-01", Some("100"), "coffee");
1408        let hex = fp.to_hex();
1409        let out = EnrichedImporterOutput {
1410            entries: vec![(
1411                open_wrapper("Assets:Bank"),
1412                enrichment_wrapper("rule", Some(hex)),
1413            )],
1414            warnings: vec![],
1415            errors: vec![],
1416        };
1417        let bridged = bridge_enriched_output(out).expect("bridge succeeds");
1418        assert_eq!(bridged.entries.len(), 1);
1419        assert_eq!(
1420            bridged.entries[0].1.fingerprint,
1421            Some(fp),
1422            "fingerprint should round-trip"
1423        );
1424        assert!(bridged.warnings.is_empty(), "no warnings expected");
1425    }
1426
1427    #[test]
1428    fn bridge_warns_on_malformed_fingerprint_hex_and_drops_to_none() {
1429        let out = EnrichedImporterOutput {
1430            entries: vec![(
1431                open_wrapper("Assets:Bank"),
1432                enrichment_wrapper("rule", Some("not-a-valid-hex".to_string())),
1433            )],
1434            warnings: vec![],
1435            errors: vec![],
1436        };
1437        let bridged = bridge_enriched_output(out).expect("bridge succeeds");
1438        assert_eq!(bridged.entries.len(), 1);
1439        assert_eq!(bridged.entries[0].1.fingerprint, None);
1440        // Warning text names the bad hex so the importer author can
1441        // find the bug quickly.
1442        assert_eq!(bridged.warnings.len(), 1);
1443        assert!(
1444            bridged.warnings[0].contains("not-a-valid-hex"),
1445            "warning should name the bad hex: {}",
1446            bridged.warnings[0]
1447        );
1448    }
1449
1450    #[test]
1451    fn bridge_warns_on_unknown_method_and_falls_back_to_default() {
1452        use rustledger_ops::enrichment::CategorizationMethod;
1453        let out = EnrichedImporterOutput {
1454            entries: vec![(
1455                open_wrapper("Assets:Bank"),
1456                enrichment_wrapper("merchant_dict", None), // underscore typo, exact #1130 bug shape
1457            )],
1458            warnings: vec![],
1459            errors: vec![],
1460        };
1461        let bridged = bridge_enriched_output(out).expect("bridge succeeds");
1462        assert_eq!(bridged.entries[0].1.method, CategorizationMethod::Default);
1463        assert_eq!(bridged.warnings.len(), 1);
1464        assert!(
1465            bridged.warnings[0].contains("merchant_dict"),
1466            "warning should name the unknown method: {}",
1467            bridged.warnings[0]
1468        );
1469    }
1470
1471    #[test]
1472    fn bridge_warns_on_unknown_method_in_alternative() {
1473        use rustledger_ops::enrichment::CategorizationMethod;
1474        let mut enr = enrichment_wrapper("rule", None);
1475        enr.alternatives = vec![AlternativeWrapper {
1476            account: "Expenses:Other".to_string(),
1477            confidence: 0.3,
1478            method: "future-method".to_string(),
1479        }];
1480        let out = EnrichedImporterOutput {
1481            entries: vec![(open_wrapper("Assets:Bank"), enr)],
1482            warnings: vec![],
1483            errors: vec![],
1484        };
1485        let bridged = bridge_enriched_output(out).expect("bridge succeeds");
1486        let alt = &bridged.entries[0].1.alternatives[0];
1487        assert_eq!(alt.method, CategorizationMethod::Default);
1488        assert_eq!(bridged.warnings.len(), 1);
1489        assert!(bridged.warnings[0].contains("future-method"));
1490        assert!(
1491            bridged.warnings[0].contains("alternative"),
1492            "warning should distinguish the alternative slot: {}",
1493            bridged.warnings[0]
1494        );
1495    }
1496
1497    #[test]
1498    fn bridge_warning_ordering_is_bridge_then_output_warnings_then_errors() {
1499        // Pins the warning-emission order documented on
1500        // `bridge_enriched_output`. Order matters for downstream
1501        // consumers that filter or surface them.
1502        let out = EnrichedImporterOutput {
1503            entries: vec![(
1504                open_wrapper("Assets:Bank"),
1505                enrichment_wrapper("nonsense", None),
1506            )],
1507            warnings: vec!["informational warning".to_string()],
1508            errors: vec![PluginError::error("structured error").at("foo.csv", 7)],
1509        };
1510        let bridged = bridge_enriched_output(out).expect("bridge succeeds");
1511        assert_eq!(bridged.warnings.len(), 3);
1512        assert!(
1513            bridged.warnings[0].contains("nonsense"),
1514            "first: bridge warning, got {}",
1515            bridged.warnings[0]
1516        );
1517        assert_eq!(
1518            bridged.warnings[1], "informational warning",
1519            "second: output.warnings forwarded verbatim"
1520        );
1521        assert_eq!(
1522            bridged.warnings[2], "error foo.csv:7: structured error",
1523            "third: output.errors via format_plugin_error"
1524        );
1525    }
1526
1527    #[test]
1528    fn output_to_import_result_uses_severity_aware_formatter() {
1529        // Integration test: proves format_plugin_error is actually
1530        // wired into the production path, not just unit-tested in
1531        // isolation. A refactor that switches back to raw format!()
1532        // would regress this.
1533        let out = ImporterOutput {
1534            directives: vec![],
1535            warnings: vec!["plain warning".to_string()],
1536            errors: vec![
1537                PluginError::error("bad row").at("foo.csv", 42),
1538                PluginError::warning("weird value"),
1539            ],
1540        };
1541        let result = output_to_import_result(out).expect("succeeds");
1542        assert_eq!(
1543            result.warnings,
1544            vec![
1545                "plain warning".to_string(),
1546                "error foo.csv:42: bad row".to_string(),
1547                "warning: weird value".to_string(),
1548            ]
1549        );
1550    }
1551
1552    // ===== Accessor / constructor tests =====
1553
1554    #[test]
1555    fn load_embedded_uses_name_as_path_and_default_config() {
1556        let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1557        let importer =
1558            WasmImporter::load_embedded("inline-test", &bytes).expect("embedded load succeeds");
1559        // The diagnostic name flows through to `path()` so error
1560        // messages and logs identify the embedded module.
1561        assert_eq!(importer.path(), Path::new("inline-test"));
1562        // Default config is used, caller didn't pass one.
1563        assert_eq!(
1564            importer.runtime_config().max_memory,
1565            sandbox::DEFAULT_SANDBOX_MAX_MEMORY
1566        );
1567        assert_eq!(
1568            importer.runtime_config().max_time_secs,
1569            sandbox::DEFAULT_SANDBOX_MAX_TIME_SECS
1570        );
1571        // Metadata still cached as in the standard load path.
1572        assert_eq!(importer.name(), "tst");
1573    }
1574
1575    #[test]
1576    fn runtime_config_returns_the_loaded_config() {
1577        let custom = WasmRuntimeConfig {
1578            max_memory: 128 * 1024 * 1024,
1579            max_time_secs: 60,
1580        };
1581        let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1582        let importer = WasmImporter::load_from_bytes(PathBuf::from("custom.wasm"), &bytes, custom)
1583            .expect("custom-config load succeeds");
1584        assert_eq!(importer.runtime_config().max_memory, custom.max_memory);
1585        assert_eq!(
1586            importer.runtime_config().max_time_secs,
1587            custom.max_time_secs
1588        );
1589    }
1590
1591    #[test]
1592    fn debug_impl_does_not_panic_and_redacts_wasmtime_types() {
1593        let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1594        let importer = WasmImporter::load_embedded("dbg-test", &bytes).expect("load succeeds");
1595        let s = format!("{importer:?}");
1596        // Includes host metadata...
1597        assert!(s.contains("WasmImporter"));
1598        assert!(s.contains("dbg-test"));
1599        assert!(s.contains("tst")); // name + description
1600        // ...but doesn't leak wasmtime Module/Engine internals.
1601        assert!(
1602            !s.contains("Module {"),
1603            "Debug should not expand the wasmtime Module: {s}"
1604        );
1605    }
1606}