use std::path::{Path, PathBuf};
use std::sync::Arc;
use rustledger_ops::fingerprint::Fingerprint;
use rustledger_plugin::sandbox::{self, StoreState};
use rustledger_plugin_types::{
EnrichedImporterOutput, IdentifyInput, IdentifyOutput, ImporterInput, ImporterOutput,
MetadataOutput, PluginError, PluginErrorSeverity,
};
use serde::{Serialize, de::DeserializeOwned};
use wasmtime::{Engine, Linker, Module, Store};
use crate::config::{CsvConfig, ImporterType};
use crate::{EnrichedImportResult, ImportResult, Importer, ImporterConfig};
const MAX_OUTPUT_BYTES: usize = 64 * 1024 * 1024;
const MAX_INPUT_BYTES: usize = 64 * 1024 * 1024;
#[derive(Debug, Clone, Copy)]
pub struct WasmRuntimeConfig {
pub max_memory: usize,
pub max_time_secs: u64,
}
impl Default for WasmRuntimeConfig {
fn default() -> Self {
Self {
max_memory: sandbox::DEFAULT_SANDBOX_MAX_MEMORY,
max_time_secs: sandbox::DEFAULT_SANDBOX_MAX_TIME_SECS,
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum WasmImporterError {
#[error("failed to read WASM file {path}: {source}")]
Io {
path: PathBuf,
source: std::io::Error,
},
#[error("failed to enumerate entry in WASM importer directory {dir}: {source}")]
DirEntry {
dir: PathBuf,
source: std::io::Error,
},
#[error("failed to compile WASM module {path}: {source}")]
Compile {
path: PathBuf,
source: anyhow::Error,
},
#[error(
"WASM importer has forbidden import {module}::{name} — importers must be self-contained"
)]
ForbiddenImport {
module: String,
name: String,
},
#[error("WASM importer missing required export `{0}`")]
MissingExport(&'static str),
#[error("WASM importer runtime error: {0}")]
Runtime(#[source] anyhow::Error),
#[error("WASM importer returned malformed MessagePack: {0}")]
Decode(#[source] rmp_serde::decode::Error),
#[error("failed to encode input for WASM importer: {0}")]
Encode(#[source] rmp_serde::encode::Error),
#[error("WASM importer returned output of {len} bytes, exceeds cap of {max} bytes")]
OutputTooLarge {
len: usize,
max: usize,
},
#[error("input of {len} bytes exceeds cap of {max} bytes for WASM importer")]
InputTooLarge {
len: usize,
max: usize,
},
#[error("WASM importer export `{name}` has wrong signature: {source}")]
ExportSignatureMismatch {
name: &'static str,
source: anyhow::Error,
},
#[error(
"WASM importer has a missing or invalid `{export}` export (expected signature \
`() -> u32`): it was built against an incompatible rustledger-plugin-types, or the \
export is absent, mistyped, or traps. Host requires ABI v{expected}. Rebuild against \
a matching rustledger-plugin-types."
)]
AbiVersionMissing {
export: &'static str,
expected: u32,
},
#[error(
"WASM importer ABI version mismatch: importer declares v{found}, host requires \
v{expected}. Rebuild against a matching rustledger-plugin-types."
)]
AbiVersionMismatch {
found: u32,
expected: u32,
},
}
#[inline]
fn runtime_err(e: wasmtime::Error) -> WasmImporterError {
WasmImporterError::Runtime(anyhow::Error::from(e))
}
#[derive(Clone)]
pub struct WasmImporter {
path: PathBuf,
name: String,
description: String,
module: Arc<Module>,
engine: Arc<Engine>,
config: WasmRuntimeConfig,
}
impl std::fmt::Debug for WasmImporter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("WasmImporter")
.field("path", &self.path)
.field("name", &self.name)
.field("description", &self.description)
.field("config", &self.config)
.finish_non_exhaustive()
}
}
impl WasmImporter {
pub fn load(path: impl Into<PathBuf>) -> Result<Self, WasmImporterError> {
Self::load_with_config(path, WasmRuntimeConfig::default())
}
pub fn load_with_config(
path: impl Into<PathBuf>,
config: WasmRuntimeConfig,
) -> Result<Self, WasmImporterError> {
let path = path.into();
let bytes = std::fs::read(&path).map_err(|source| WasmImporterError::Io {
path: path.clone(),
source,
})?;
Self::load_from_bytes(path, &bytes, config)
}
pub fn load_embedded(
name_for_diagnostics: &str,
bytes: &[u8],
) -> Result<Self, WasmImporterError> {
Self::load_from_bytes(
PathBuf::from(name_for_diagnostics),
bytes,
WasmRuntimeConfig::default(),
)
}
pub fn load_from_bytes(
path: impl Into<PathBuf>,
bytes: &[u8],
config: WasmRuntimeConfig,
) -> Result<Self, WasmImporterError> {
let path = path.into();
let engine = sandbox::shared_engine();
let module = Module::new(&engine, bytes).map_err(|e| WasmImporterError::Compile {
path: path.clone(),
source: anyhow::Error::from(e),
})?;
Self::validate_module(&module)?;
let module = Arc::new(module);
let metadata = call_metadata(&engine, &module, config)?;
Ok(Self {
path,
name: metadata.name,
description: metadata.description,
module,
engine,
config,
})
}
#[must_use]
pub fn path(&self) -> &Path {
&self.path
}
#[must_use]
pub const fn runtime_config(&self) -> WasmRuntimeConfig {
self.config
}
fn validate_module(module: &Module) -> Result<(), WasmImporterError> {
if let Some(import) = module.imports().next() {
return Err(WasmImporterError::ForbiddenImport {
module: import.module().to_string(),
name: import.name().to_string(),
});
}
let exports: Vec<_> = module.exports().map(|e| e.name().to_string()).collect();
for required in &[
"memory",
"alloc",
"metadata",
"identify",
"extract",
"extract_enriched",
] {
if !exports.iter().any(|n| n == required) {
return Err(WasmImporterError::MissingExport(required));
}
}
Ok(())
}
fn call_msgpack<I: Serialize, O: DeserializeOwned>(
&self,
entry: &'static str,
input: &I,
) -> Result<O, WasmImporterError> {
call_msgpack_with(&self.engine, &self.module, self.config, entry, input)
}
}
const fn validate_input_size(len: usize) -> Result<u32, WasmImporterError> {
if len > MAX_INPUT_BYTES {
return Err(WasmImporterError::InputTooLarge {
len,
max: MAX_INPUT_BYTES,
});
}
Ok(len as u32)
}
fn read_packed_output(
store: &Store<StoreState>,
memory: &wasmtime::Memory,
packed: u64,
) -> Result<Vec<u8>, WasmImporterError> {
let out_ptr = (packed >> 32) as u32;
let out_len = (packed & 0xFFFF_FFFF) as u32 as usize;
if out_len > MAX_OUTPUT_BYTES {
return Err(WasmImporterError::OutputTooLarge {
len: out_len,
max: MAX_OUTPUT_BYTES,
});
}
let mut out_bytes = vec![0u8; out_len];
memory
.read(store, out_ptr as usize, &mut out_bytes)
.map_err(|e| WasmImporterError::Runtime(e.into()))?;
Ok(out_bytes)
}
fn call_msgpack_with<I: Serialize, O: DeserializeOwned>(
engine: &Engine,
module: &Module,
config: WasmRuntimeConfig,
entry: &'static str,
input: &I,
) -> Result<O, WasmImporterError> {
let input_bytes = rmp_serde::to_vec(input).map_err(WasmImporterError::Encode)?;
let input_len = validate_input_size(input_bytes.len())?;
let mut store = sandbox::make_sandboxed_store(engine, config.max_memory, config.max_time_secs)
.map_err(runtime_err)?;
let linker = Linker::new(engine);
let instance = linker
.instantiate(&mut store, module)
.map_err(runtime_err)?;
let memory = instance
.get_memory(&mut store, "memory")
.expect("validate_module verified `memory` export at load");
let alloc = instance
.get_typed_func::<u32, u32>(&mut store, "alloc")
.map_err(|e| WasmImporterError::ExportSignatureMismatch {
name: "alloc",
source: anyhow::Error::from(e),
})?;
let input_ptr = alloc.call(&mut store, input_len).map_err(runtime_err)?;
memory
.write(&mut store, input_ptr as usize, &input_bytes)
.map_err(|e| WasmImporterError::Runtime(e.into()))?;
let func = instance
.get_typed_func::<(u32, u32), u64>(&mut store, entry)
.map_err(|e| WasmImporterError::ExportSignatureMismatch {
name: entry,
source: anyhow::Error::from(e),
})?;
let packed = func
.call(&mut store, (input_ptr, input_len))
.map_err(runtime_err)?;
let out_bytes = read_packed_output(&store, &memory, packed)?;
rmp_serde::from_slice(&out_bytes).map_err(WasmImporterError::Decode)
}
fn call_metadata(
engine: &Engine,
module: &Module,
config: WasmRuntimeConfig,
) -> Result<MetadataOutput, WasmImporterError> {
let mut store = sandbox::make_sandboxed_store(engine, config.max_memory, config.max_time_secs)
.map_err(runtime_err)?;
let linker = Linker::new(engine);
let instance = linker
.instantiate(&mut store, module)
.map_err(runtime_err)?;
match sandbox::check_guest_abi(&instance, &mut store) {
sandbox::AbiCheck::Match => {}
sandbox::AbiCheck::Missing => {
return Err(WasmImporterError::AbiVersionMissing {
export: rustledger_plugin_types::ABI_VERSION_EXPORT,
expected: sandbox::HOST_ABI_VERSION,
});
}
sandbox::AbiCheck::Mismatch { found } => {
return Err(WasmImporterError::AbiVersionMismatch {
found,
expected: sandbox::HOST_ABI_VERSION,
});
}
}
let memory = instance
.get_memory(&mut store, "memory")
.expect("validate_module verified `memory` export at load");
let metadata = instance
.get_typed_func::<(), u64>(&mut store, "metadata")
.map_err(|e| WasmImporterError::ExportSignatureMismatch {
name: "metadata",
source: anyhow::Error::from(e),
})?;
let packed = metadata.call(&mut store, ()).map_err(runtime_err)?;
let out_bytes = read_packed_output(&store, &memory, packed)?;
rmp_serde::from_slice(&out_bytes).map_err(WasmImporterError::Decode)
}
fn build_wasm_input(path: &Path, content: Vec<u8>, config: &ImporterConfig) -> ImporterInput {
let mut options = std::collections::HashMap::new();
let ImporterType::Csv(csv) = &config.importer_type;
project_csv_config_into_options(csv, &mut options);
ImporterInput {
path: path.to_string_lossy().into_owned(),
content,
account: config.account.clone(),
currency: config.currency.clone(),
options,
}
}
fn project_csv_config_into_options(
csv: &CsvConfig,
options: &mut std::collections::HashMap<String, String>,
) {
options.insert("date_format".to_string(), csv.date_format.clone());
options.insert("delimiter".to_string(), csv.delimiter.to_string());
options.insert("has_header".to_string(), csv.has_header.to_string());
options.insert("skip_rows".to_string(), csv.skip_rows.to_string());
options.insert("invert_sign".to_string(), csv.invert_sign.to_string());
options.insert(
"skip_zero_amounts".to_string(),
csv.skip_zero_amounts.to_string(),
);
if let Some(de) = &csv.default_expense {
options.insert("default_expense".to_string(), de.clone());
}
if let Some(di) = &csv.default_income {
options.insert("default_income".to_string(), di.clone());
}
}
fn format_plugin_error(e: &PluginError) -> String {
let severity = match e.severity {
PluginErrorSeverity::Error => "error",
PluginErrorSeverity::Warning => "warning",
};
let location = match (&e.source_file, e.line_number) {
(Some(f), Some(n)) => format!(" {f}:{n}"),
(Some(f), None) => format!(" {f}"),
(None, Some(n)) => format!(" line {n}"),
(None, None) => String::new(),
};
format!("{severity}{location}: {}", e.message)
}
fn output_to_import_result(out: ImporterOutput) -> anyhow::Result<ImportResult> {
let mut directives = Vec::with_capacity(out.directives.len());
for w in out.directives {
let d = rustledger_plugin::convert::wrapper_to_directive(&w)
.map_err(|e| anyhow::anyhow!("WASM importer returned invalid directive: {e:?}"))?;
directives.push(d);
}
let mut result = ImportResult::new(directives);
for w in out.warnings {
result = result.with_warning(w);
}
for e in &out.errors {
result = result.with_warning(format_plugin_error(e));
}
Ok(result)
}
impl Importer for WasmImporter {
fn name(&self) -> &str {
&self.name
}
fn description(&self) -> &str {
&self.description
}
fn identify(&self, path: &Path) -> bool {
let input = IdentifyInput {
path: path.to_string_lossy().into_owned(),
};
match self.call_msgpack::<_, IdentifyOutput>("identify", &input) {
Ok(out) => out.matches,
Err(e) => {
eprintln!(
"warning: WASM importer `{}` identify({}) failed: {e}",
self.name,
path.display()
);
false
}
}
}
fn extract(&self, path: &Path, config: &ImporterConfig) -> anyhow::Result<ImportResult> {
let content = std::fs::read(path).map_err(|source| WasmImporterError::Io {
path: path.to_path_buf(),
source,
})?;
let input = build_wasm_input(path, content, config);
let output: ImporterOutput = self.call_msgpack("extract", &input)?;
output_to_import_result(output)
}
fn extract_enriched(
&self,
path: &Path,
config: &ImporterConfig,
) -> anyhow::Result<EnrichedImportResult> {
let content = std::fs::read(path).map_err(|source| WasmImporterError::Io {
path: path.to_path_buf(),
source,
})?;
let input = build_wasm_input(path, content, config);
let output: EnrichedImporterOutput = self.call_msgpack("extract_enriched", &input)?;
bridge_enriched_output(output)
}
}
fn bridge_enriched_output(output: EnrichedImporterOutput) -> anyhow::Result<EnrichedImportResult> {
let mut entries = Vec::with_capacity(output.entries.len());
let mut bridge_warnings: Vec<String> = Vec::new();
for (wrapper, enr) in output.entries {
let dir = rustledger_plugin::convert::wrapper_to_directive(&wrapper)
.map_err(|e| anyhow::anyhow!("WASM importer returned invalid directive: {e:?}"))?;
let method = parse_method(&enr.method).unwrap_or_else(|unknown| {
bridge_warnings.push(format!(
"warning: WASM importer used unknown categorization method `{unknown}`, falling back to Default"
));
rustledger_ops::enrichment::CategorizationMethod::Default
});
let alternatives = enr
.alternatives
.into_iter()
.map(|a| {
let alt_method = parse_method(&a.method).unwrap_or_else(|unknown| {
bridge_warnings.push(format!(
"warning: WASM importer used unknown categorization method `{unknown}` in alternative, falling back to Default"
));
rustledger_ops::enrichment::CategorizationMethod::Default
});
rustledger_ops::enrichment::Alternative {
account: a.account,
confidence: a.confidence,
method: alt_method,
}
})
.collect();
let fingerprint = match enr.fingerprint {
Some(hex) => match Fingerprint::from_hex(&hex) {
Ok(fp) => Some(fp),
Err(e) => {
bridge_warnings.push(format!(
"warning: WASM importer returned malformed fingerprint hex `{hex}`: {e}"
));
None
}
},
None => None,
};
let enrichment = rustledger_ops::enrichment::Enrichment {
directive_index: enr.directive_index,
confidence: enr.confidence,
method,
alternatives,
fingerprint,
};
entries.push((dir, enrichment));
}
let mut enriched = EnrichedImportResult::new(entries);
for w in bridge_warnings {
enriched = enriched.with_warning(w);
}
for w in output.warnings {
enriched = enriched.with_warning(w);
}
for e in &output.errors {
enriched = enriched.with_warning(format_plugin_error(e));
}
Ok(enriched)
}
fn parse_method(s: &str) -> Result<rustledger_ops::enrichment::CategorizationMethod, &str> {
use rustledger_ops::enrichment::CategorizationMethod;
match s {
"rule" => Ok(CategorizationMethod::Rule),
"merchant-dict" => Ok(CategorizationMethod::MerchantDict),
"ml" => Ok(CategorizationMethod::Ml),
"llm" => Ok(CategorizationMethod::Llm),
"manual" => Ok(CategorizationMethod::Manual),
"default" => Ok(CategorizationMethod::Default),
unknown => Err(unknown),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn wasm_runtime_config_default_is_sensible() {
let c = WasmRuntimeConfig::default();
assert_eq!(c.max_memory, sandbox::DEFAULT_SANDBOX_MAX_MEMORY);
assert_eq!(c.max_time_secs, sandbox::DEFAULT_SANDBOX_MAX_TIME_SECS);
}
#[test]
fn validate_module_rejects_module_with_imports() {
let wat = r#"
(module
(import "env" "ext" (func $ext))
(memory (export "memory") 1)
(func (export "alloc") (param i32) (result i32) i32.const 0)
(func (export "metadata") (result i64) i64.const 0)
(func (export "identify") (param i32 i32) (result i64) i64.const 0)
(func (export "extract") (param i32 i32) (result i64) i64.const 0)
(func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
)
"#;
let bytes = wat::parse_str(wat).expect("WAT parses");
let engine = sandbox::shared_engine();
let module = Module::new(&engine, &bytes).unwrap();
let err = WasmImporter::validate_module(&module).unwrap_err();
assert!(matches!(err, WasmImporterError::ForbiddenImport { .. }));
}
#[test]
fn validate_module_rejects_missing_export() {
let wat = r#"
(module
(memory (export "memory") 1)
(func (export "alloc") (param i32) (result i32) i32.const 0)
(func (export "metadata") (result i64) i64.const 0)
)
"#;
let bytes = wat::parse_str(wat).expect("WAT parses");
let engine = sandbox::shared_engine();
let module = Module::new(&engine, &bytes).unwrap();
let err = WasmImporter::validate_module(&module).unwrap_err();
assert!(matches!(err, WasmImporterError::MissingExport(_)));
}
#[test]
fn parse_method_round_trips_known_values() {
use rustledger_ops::enrichment::CategorizationMethod;
assert!(matches!(
parse_method("rule"),
Ok(CategorizationMethod::Rule)
));
assert!(matches!(
parse_method("merchant-dict"),
Ok(CategorizationMethod::MerchantDict)
));
assert!(matches!(parse_method("ml"), Ok(CategorizationMethod::Ml)));
assert!(matches!(parse_method("llm"), Ok(CategorizationMethod::Llm)));
assert!(matches!(
parse_method("manual"),
Ok(CategorizationMethod::Manual)
));
assert!(matches!(
parse_method("default"),
Ok(CategorizationMethod::Default)
));
}
#[test]
fn parse_method_round_trips_via_as_meta_value() {
use rustledger_ops::enrichment::CategorizationMethod;
for m in [
CategorizationMethod::Rule,
CategorizationMethod::MerchantDict,
CategorizationMethod::Ml,
CategorizationMethod::Llm,
CategorizationMethod::Manual,
CategorizationMethod::Default,
] {
let s = m.as_meta_value();
let parsed = parse_method(s)
.unwrap_or_else(|u| panic!("as_meta_value `{u}` not handled by parse_method"));
assert_eq!(parsed, m, "round-trip failed for {m:?}");
}
}
#[test]
fn parse_method_unknown_surfaces_the_unknown_string() {
assert_eq!(parse_method("future-method"), Err("future-method"));
assert_eq!(parse_method("merchant_dict"), Err("merchant_dict"));
assert_eq!(parse_method(""), Err(""));
}
#[test]
fn format_plugin_error_with_full_location() {
let e = PluginError::error("bad row").at("foo.csv", 42);
assert_eq!(format_plugin_error(&e), "error foo.csv:42: bad row");
}
#[test]
fn format_plugin_error_warning_severity() {
let e = PluginError::warning("weird value").at("foo.csv", 42);
assert_eq!(format_plugin_error(&e), "warning foo.csv:42: weird value");
}
#[test]
fn format_plugin_error_no_location_no_orphan_colon() {
let e = PluginError::error("parser bug");
assert_eq!(format_plugin_error(&e), "error: parser bug");
}
#[test]
fn format_plugin_error_file_only() {
let e = PluginError::warning("weird value");
let e = PluginError {
source_file: Some("foo.csv".to_string()),
..e
};
assert_eq!(format_plugin_error(&e), "warning foo.csv: weird value");
}
#[test]
fn format_plugin_error_line_only_uses_human_phrasing() {
let e = PluginError::warning("weird");
let e = PluginError {
line_number: Some(42),
..e
};
assert_eq!(format_plugin_error(&e), "warning line 42: weird");
}
fn roundtrip_wat() -> &'static str {
r#"
(module
(memory (export "memory") 1)
;; MetadataOutput { name: "tst", description: "tst" }
;; 0x92 fixarray-2, 0xa3 fixstr-3 "tst", 0xa3 fixstr-3 "tst"
(data (i32.const 0) "\92\a3tst\a3tst")
;; IdentifyOutput { matches: true }
;; 0x91 fixarray-1, 0xc3 true
(data (i32.const 16) "\91\c3")
;; ImporterOutput { directives: [], warnings: [], errors: [] }
;; 0x93 fixarray-3, then three 0x90 fixarray-0
(data (i32.const 24) "\93\90\90\90")
;; EnrichedImporterOutput { entries: [], warnings: [], errors: [] }
(data (i32.const 32) "\93\90\90\90")
;; bump allocator: hand out at $bump, advance by $size.
;; NOTE: real importers MUST bounds-check $bump+$size
;; against current memory and call `memory.grow` (subject
;; to MemoryLimiter approval). This test fixture skips
;; that — inputs in the test are small and we declare 1
;; full page (64 KiB), so the bump never crosses the
;; boundary.
(global $bump (mut i32) (i32.const 1024))
(func (export "alloc") (param $size i32) (result i32)
(local $ret i32)
global.get $bump
local.set $ret
global.get $bump
local.get $size
i32.add
global.set $bump
local.get $ret)
;; metadata: ptr=0, len=9 → (0<<32) | 9 = 9
(func (export "metadata") (result i64)
i64.const 9)
;; identify: ptr=16, len=2 → (16<<32) | 2
(func (export "identify") (param i32 i32) (result i64)
i64.const 0x10_0000_0002)
;; extract: ptr=24, len=4 → (24<<32) | 4
(func (export "extract") (param i32 i32) (result i64)
i64.const 0x18_0000_0004)
;; extract_enriched: ptr=32, len=4 → (32<<32) | 4
(func (export "extract_enriched") (param i32 i32) (result i64)
i64.const 0x20_0000_0004)
;; ABI handshake export. Must equal sandbox::HOST_ABI_VERSION
;; (rustledger_plugin_types::ABI_VERSION = 1). If the ABI
;; version is ever bumped, this literal moves in lockstep —
;; the deliberate test update that proves a real guest would
;; need rebuilding too.
(func (export "__rustledger_abi_version") (result i32)
i32.const 1)
)
"#
}
fn minimal_config() -> ImporterConfig {
ImporterConfig {
account: "Assets:Bank:Checking".to_string(),
currency: Some("USD".to_string()),
importer_type: ImporterType::Csv(CsvConfig::default()),
}
}
fn importer_wat_with_abi(abi_section: &str) -> String {
format!(
r#"
(module
(memory (export "memory") 1)
(func (export "alloc") (param i32) (result i32) i32.const 0)
(func (export "metadata") (result i64) i64.const 0)
(func (export "identify") (param i32 i32) (result i64) i64.const 0)
(func (export "extract") (param i32 i32) (result i64) i64.const 0)
(func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
{abi_section}
)
"#
)
}
#[test]
fn load_rejects_importer_missing_abi_version() {
let bytes = wat::parse_str(importer_wat_with_abi("")).expect("WAT parses");
let err = WasmImporter::load_from_bytes(
PathBuf::from("noabi.wasm"),
&bytes,
WasmRuntimeConfig::default(),
)
.expect_err("load must reject an importer with no ABI export");
assert!(
matches!(err, WasmImporterError::AbiVersionMissing { .. }),
"expected AbiVersionMissing, got: {err:?}"
);
}
#[test]
fn load_rejects_importer_with_mismatched_abi_version() {
let wat = importer_wat_with_abi(
r#"(func (export "__rustledger_abi_version") (result i32) i32.const 999)"#,
);
let bytes = wat::parse_str(wat).expect("WAT parses");
let err = WasmImporter::load_from_bytes(
PathBuf::from("badabi.wasm"),
&bytes,
WasmRuntimeConfig::default(),
)
.expect_err("load must reject an ABI-mismatched importer");
match err {
WasmImporterError::AbiVersionMismatch { found, expected } => {
assert_eq!(found, 999);
assert_eq!(expected, sandbox::HOST_ABI_VERSION);
}
other => panic!("expected AbiVersionMismatch, got: {other:?}"),
}
}
#[test]
fn end_to_end_wat_module_round_trips_all_entry_points() {
let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
let importer = WasmImporter::load_from_bytes(
PathBuf::from("test.wasm"),
&bytes,
WasmRuntimeConfig::default(),
)
.expect("module loads + metadata round-trips");
assert_eq!(importer.name(), "tst");
assert_eq!(importer.description(), "tst");
assert!(importer.identify(Path::new("anything.csv")));
let tmp = tempfile::NamedTempFile::new().expect("tempfile");
let config = minimal_config();
let result = importer
.extract(tmp.path(), &config)
.expect("extract round-trip");
assert!(result.directives.is_empty());
assert!(result.warnings.is_empty());
let enriched = importer
.extract_enriched(tmp.path(), &config)
.expect("extract_enriched round-trip");
assert!(enriched.entries.is_empty());
assert!(enriched.warnings.is_empty());
}
#[test]
fn oversized_output_is_rejected_before_allocation() {
let wat = r#"
(module
(memory (export "memory") 1)
(func (export "alloc") (param i32) (result i32) i32.const 0)
;; metadata: ptr=0, len=u32::MAX
(func (export "metadata") (result i64)
i64.const 0x0000_0000_ffff_ffff)
(func (export "identify") (param i32 i32) (result i64) i64.const 0)
(func (export "extract") (param i32 i32) (result i64) i64.const 0)
(func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
;; ABI handshake passes so the oversized-metadata check
;; downstream is what rejects this module (issue #1234).
(func (export "__rustledger_abi_version") (result i32) i32.const 1)
)
"#;
let bytes = wat::parse_str(wat).expect("WAT parses");
let Err(err) = WasmImporter::load_from_bytes(
PathBuf::from("oversized.wasm"),
&bytes,
WasmRuntimeConfig::default(),
) else {
panic!("oversized metadata output should have been rejected at load");
};
assert!(
matches!(
err,
WasmImporterError::OutputTooLarge { len, max }
if len == u32::MAX as usize && max == MAX_OUTPUT_BYTES
),
"expected OutputTooLarge, got {err:?}"
);
}
#[test]
fn zero_max_time_secs_does_not_starve_fuel() {
let config = WasmRuntimeConfig {
max_memory: sandbox::DEFAULT_SANDBOX_MAX_MEMORY,
max_time_secs: 0,
};
let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
let importer = WasmImporter::load_from_bytes(PathBuf::from("test.wasm"), &bytes, config)
.expect("zero max_time_secs is clamped, not starved");
assert_eq!(importer.name(), "tst");
}
#[test]
fn validate_input_size_accepts_at_cap_and_rejects_above() {
assert_eq!(
validate_input_size(MAX_INPUT_BYTES).unwrap(),
MAX_INPUT_BYTES as u32
);
let err = validate_input_size(MAX_INPUT_BYTES + 1).unwrap_err();
assert!(
matches!(
err,
WasmImporterError::InputTooLarge { len, max }
if len == MAX_INPUT_BYTES + 1 && max == MAX_INPUT_BYTES
),
"got: {err:?}"
);
}
#[test]
fn fuel_calc_saturates_instead_of_overflowing() {
let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
let config = WasmRuntimeConfig {
max_memory: sandbox::DEFAULT_SANDBOX_MAX_MEMORY,
max_time_secs: u64::MAX,
};
let importer = WasmImporter::load_from_bytes(PathBuf::from("test.wasm"), &bytes, config)
.expect("u64::MAX max_time_secs saturates, doesn't overflow");
assert_eq!(importer.name(), "tst");
}
#[test]
fn wrong_signature_export_surfaces_export_signature_mismatch() {
let wat = r#"
(module
(memory (export "memory") 1)
(func (export "alloc") (param i32) (result i32) i32.const 0)
;; WRONG: should be (result i64), declared as (result i32)
(func (export "metadata") (result i32) i32.const 0)
(func (export "identify") (param i32 i32) (result i64) i64.const 0)
(func (export "extract") (param i32 i32) (result i64) i64.const 0)
(func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
;; Correct ABI so the check passes and the metadata
;; signature mismatch is what surfaces (issue #1234).
(func (export "__rustledger_abi_version") (result i32) i32.const 1)
)
"#;
let bytes = wat::parse_str(wat).expect("WAT parses");
let Err(err) = WasmImporter::load_from_bytes(
PathBuf::from("badsig.wasm"),
&bytes,
WasmRuntimeConfig::default(),
) else {
panic!("metadata with wrong signature should be rejected");
};
assert!(
matches!(
err,
WasmImporterError::ExportSignatureMismatch {
name: "metadata",
..
}
),
"expected ExportSignatureMismatch for metadata, got {err:?}"
);
}
#[test]
fn initial_memory_above_cap_is_rejected_via_limiter_wiring() {
let wat = r#"
(module
(memory (export "memory") 5000)
(func (export "alloc") (param i32) (result i32) i32.const 0)
(func (export "metadata") (result i64) i64.const 0)
(func (export "identify") (param i32 i32) (result i64) i64.const 0)
(func (export "extract") (param i32 i32) (result i64) i64.const 0)
(func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
)
"#;
let bytes = wat::parse_str(wat).expect("WAT parses");
let config = WasmRuntimeConfig {
max_memory: 64 * 1024 * 1024,
max_time_secs: 30,
};
let Err(err) = WasmImporter::load_from_bytes(PathBuf::from("bigmem.wasm"), &bytes, config)
else {
panic!("module declaring 320 MiB initial memory should be rejected with 64 MiB cap");
};
assert!(
matches!(err, WasmImporterError::Runtime(_)),
"expected Runtime (instantiate failed via limiter), got {err:?}"
);
}
use rustledger_plugin_types::{
AlternativeWrapper, DirectiveData, DirectiveWrapper, EnrichmentWrapper, OpenData,
};
fn open_wrapper(account: &str) -> DirectiveWrapper {
DirectiveWrapper {
directive_type: String::new(),
date: "2024-01-01".to_string(),
filename: None,
lineno: None,
data: DirectiveData::Open(OpenData {
account: account.to_string(),
currencies: vec![],
booking: None,
metadata: vec![],
}),
}
}
fn enrichment_wrapper(method: &str, fingerprint: Option<String>) -> EnrichmentWrapper {
EnrichmentWrapper {
directive_index: 0,
confidence: 1.0,
method: method.to_string(),
alternatives: vec![],
fingerprint,
}
}
#[test]
fn bridge_round_trips_valid_fingerprint_hex() {
let fp = Fingerprint::compute("2024-01-01", Some("100"), "coffee");
let hex = fp.to_hex();
let out = EnrichedImporterOutput {
entries: vec![(
open_wrapper("Assets:Bank"),
enrichment_wrapper("rule", Some(hex)),
)],
warnings: vec![],
errors: vec![],
};
let bridged = bridge_enriched_output(out).expect("bridge succeeds");
assert_eq!(bridged.entries.len(), 1);
assert_eq!(
bridged.entries[0].1.fingerprint,
Some(fp),
"fingerprint should round-trip"
);
assert!(bridged.warnings.is_empty(), "no warnings expected");
}
#[test]
fn bridge_warns_on_malformed_fingerprint_hex_and_drops_to_none() {
let out = EnrichedImporterOutput {
entries: vec![(
open_wrapper("Assets:Bank"),
enrichment_wrapper("rule", Some("not-a-valid-hex".to_string())),
)],
warnings: vec![],
errors: vec![],
};
let bridged = bridge_enriched_output(out).expect("bridge succeeds");
assert_eq!(bridged.entries.len(), 1);
assert_eq!(bridged.entries[0].1.fingerprint, None);
assert_eq!(bridged.warnings.len(), 1);
assert!(
bridged.warnings[0].contains("not-a-valid-hex"),
"warning should name the bad hex: {}",
bridged.warnings[0]
);
}
#[test]
fn bridge_warns_on_unknown_method_and_falls_back_to_default() {
use rustledger_ops::enrichment::CategorizationMethod;
let out = EnrichedImporterOutput {
entries: vec![(
open_wrapper("Assets:Bank"),
enrichment_wrapper("merchant_dict", None), )],
warnings: vec![],
errors: vec![],
};
let bridged = bridge_enriched_output(out).expect("bridge succeeds");
assert_eq!(bridged.entries[0].1.method, CategorizationMethod::Default);
assert_eq!(bridged.warnings.len(), 1);
assert!(
bridged.warnings[0].contains("merchant_dict"),
"warning should name the unknown method: {}",
bridged.warnings[0]
);
}
#[test]
fn bridge_warns_on_unknown_method_in_alternative() {
use rustledger_ops::enrichment::CategorizationMethod;
let mut enr = enrichment_wrapper("rule", None);
enr.alternatives = vec![AlternativeWrapper {
account: "Expenses:Other".to_string(),
confidence: 0.3,
method: "future-method".to_string(),
}];
let out = EnrichedImporterOutput {
entries: vec![(open_wrapper("Assets:Bank"), enr)],
warnings: vec![],
errors: vec![],
};
let bridged = bridge_enriched_output(out).expect("bridge succeeds");
let alt = &bridged.entries[0].1.alternatives[0];
assert_eq!(alt.method, CategorizationMethod::Default);
assert_eq!(bridged.warnings.len(), 1);
assert!(bridged.warnings[0].contains("future-method"));
assert!(
bridged.warnings[0].contains("alternative"),
"warning should distinguish the alternative slot: {}",
bridged.warnings[0]
);
}
#[test]
fn bridge_warning_ordering_is_bridge_then_output_warnings_then_errors() {
let out = EnrichedImporterOutput {
entries: vec![(
open_wrapper("Assets:Bank"),
enrichment_wrapper("nonsense", None),
)],
warnings: vec!["informational warning".to_string()],
errors: vec![PluginError::error("structured error").at("foo.csv", 7)],
};
let bridged = bridge_enriched_output(out).expect("bridge succeeds");
assert_eq!(bridged.warnings.len(), 3);
assert!(
bridged.warnings[0].contains("nonsense"),
"first: bridge warning, got {}",
bridged.warnings[0]
);
assert_eq!(
bridged.warnings[1], "informational warning",
"second: output.warnings forwarded verbatim"
);
assert_eq!(
bridged.warnings[2], "error foo.csv:7: structured error",
"third: output.errors via format_plugin_error"
);
}
#[test]
fn output_to_import_result_uses_severity_aware_formatter() {
let out = ImporterOutput {
directives: vec![],
warnings: vec!["plain warning".to_string()],
errors: vec![
PluginError::error("bad row").at("foo.csv", 42),
PluginError::warning("weird value"),
],
};
let result = output_to_import_result(out).expect("succeeds");
assert_eq!(
result.warnings,
vec![
"plain warning".to_string(),
"error foo.csv:42: bad row".to_string(),
"warning: weird value".to_string(),
]
);
}
#[test]
fn load_embedded_uses_name_as_path_and_default_config() {
let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
let importer =
WasmImporter::load_embedded("inline-test", &bytes).expect("embedded load succeeds");
assert_eq!(importer.path(), Path::new("inline-test"));
assert_eq!(
importer.runtime_config().max_memory,
sandbox::DEFAULT_SANDBOX_MAX_MEMORY
);
assert_eq!(
importer.runtime_config().max_time_secs,
sandbox::DEFAULT_SANDBOX_MAX_TIME_SECS
);
assert_eq!(importer.name(), "tst");
}
#[test]
fn runtime_config_returns_the_loaded_config() {
let custom = WasmRuntimeConfig {
max_memory: 128 * 1024 * 1024,
max_time_secs: 60,
};
let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
let importer = WasmImporter::load_from_bytes(PathBuf::from("custom.wasm"), &bytes, custom)
.expect("custom-config load succeeds");
assert_eq!(importer.runtime_config().max_memory, custom.max_memory);
assert_eq!(
importer.runtime_config().max_time_secs,
custom.max_time_secs
);
}
#[test]
fn debug_impl_does_not_panic_and_redacts_wasmtime_types() {
let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
let importer = WasmImporter::load_embedded("dbg-test", &bytes).expect("load succeeds");
let s = format!("{importer:?}");
assert!(s.contains("WasmImporter"));
assert!(s.contains("dbg-test"));
assert!(s.contains("tst")); assert!(
!s.contains("Module {"),
"Debug should not expand the wasmtime Module: {s}"
);
}
}