1use std::path::{Path, PathBuf};
37use std::sync::Arc;
38
39use rustledger_ops::fingerprint::Fingerprint;
40use rustledger_plugin::sandbox::{self, StoreState};
41use rustledger_plugin_types::{
42 EnrichedImporterOutput, IdentifyInput, IdentifyOutput, ImporterInput, ImporterOutput,
43 MetadataOutput, PluginError, PluginErrorSeverity,
44};
45use serde::{Serialize, de::DeserializeOwned};
46use wasmtime::{Engine, Linker, Module, Store};
47
48use crate::config::{CsvConfig, ImporterType};
49use crate::{EnrichedImportResult, ImportResult, Importer, ImporterConfig};
50
51const MAX_OUTPUT_BYTES: usize = 64 * 1024 * 1024;
63
64const MAX_INPUT_BYTES: usize = 64 * 1024 * 1024;
70
71#[derive(Debug, Clone, Copy)]
73pub struct WasmRuntimeConfig {
74 pub max_memory: usize,
76 pub max_time_secs: u64,
79}
80
81impl Default for WasmRuntimeConfig {
82 fn default() -> Self {
83 Self {
84 max_memory: sandbox::DEFAULT_SANDBOX_MAX_MEMORY,
89 max_time_secs: sandbox::DEFAULT_SANDBOX_MAX_TIME_SECS,
90 }
91 }
92}
93
94#[derive(Debug, thiserror::Error)]
96pub enum WasmImporterError {
97 #[error("failed to read WASM file {path}: {source}")]
99 Io {
100 path: PathBuf,
102 source: std::io::Error,
104 },
105 #[error("failed to enumerate entry in WASM importer directory {dir}: {source}")]
111 DirEntry {
112 dir: PathBuf,
114 source: std::io::Error,
116 },
117 #[error("failed to compile WASM module {path}: {source}")]
119 Compile {
120 path: PathBuf,
122 source: anyhow::Error,
124 },
125 #[error(
128 "WASM importer has forbidden import {module}::{name} — importers must be self-contained"
129 )]
130 ForbiddenImport {
131 module: String,
133 name: String,
135 },
136 #[error("WASM importer missing required export `{0}`")]
138 MissingExport(&'static str),
139 #[error("WASM importer runtime error: {0}")]
142 Runtime(#[source] anyhow::Error),
143 #[error("WASM importer returned malformed MessagePack: {0}")]
145 Decode(#[source] rmp_serde::decode::Error),
146 #[error("failed to encode input for WASM importer: {0}")]
150 Encode(#[source] rmp_serde::encode::Error),
151 #[error("WASM importer returned output of {len} bytes, exceeds cap of {max} bytes")]
156 OutputTooLarge {
157 len: usize,
159 max: usize,
161 },
162 #[error("input of {len} bytes exceeds cap of {max} bytes for WASM importer")]
167 InputTooLarge {
168 len: usize,
170 max: usize,
172 },
173 #[error("WASM importer export `{name}` has wrong signature: {source}")]
178 ExportSignatureMismatch {
179 name: &'static str,
181 source: anyhow::Error,
183 },
184 #[error(
190 "WASM importer has a missing or invalid `{export}` export (expected signature \
191 `() -> u32`): it was built against an incompatible rustledger-plugin-types, or the \
192 export is absent, mistyped, or traps. Host requires ABI v{expected}. Rebuild against \
193 a matching rustledger-plugin-types."
194 )]
195 AbiVersionMissing {
196 export: &'static str,
198 expected: u32,
200 },
201 #[error(
205 "WASM importer ABI version mismatch: importer declares v{found}, host requires \
206 v{expected}. Rebuild against a matching rustledger-plugin-types."
207 )]
208 AbiVersionMismatch {
209 found: u32,
211 expected: u32,
213 },
214}
215
216#[inline]
229fn runtime_err(e: wasmtime::Error) -> WasmImporterError {
230 WasmImporterError::Runtime(anyhow::Error::from(e))
231}
232
233#[derive(Clone)]
241pub struct WasmImporter {
242 path: PathBuf,
244 name: String,
246 description: String,
248 module: Arc<Module>,
250 engine: Arc<Engine>,
253 config: WasmRuntimeConfig,
255}
256
257impl std::fmt::Debug for WasmImporter {
258 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
262 f.debug_struct("WasmImporter")
263 .field("path", &self.path)
264 .field("name", &self.name)
265 .field("description", &self.description)
266 .field("config", &self.config)
267 .finish_non_exhaustive()
268 }
269}
270
271impl WasmImporter {
272 pub fn load(path: impl Into<PathBuf>) -> Result<Self, WasmImporterError> {
275 Self::load_with_config(path, WasmRuntimeConfig::default())
276 }
277
278 pub fn load_with_config(
280 path: impl Into<PathBuf>,
281 config: WasmRuntimeConfig,
282 ) -> Result<Self, WasmImporterError> {
283 let path = path.into();
284 let bytes = std::fs::read(&path).map_err(|source| WasmImporterError::Io {
285 path: path.clone(),
286 source,
287 })?;
288 Self::load_from_bytes(path, &bytes, config)
289 }
290
291 pub fn load_embedded(
297 name_for_diagnostics: &str,
298 bytes: &[u8],
299 ) -> Result<Self, WasmImporterError> {
300 Self::load_from_bytes(
301 PathBuf::from(name_for_diagnostics),
302 bytes,
303 WasmRuntimeConfig::default(),
304 )
305 }
306
307 pub fn load_from_bytes(
312 path: impl Into<PathBuf>,
313 bytes: &[u8],
314 config: WasmRuntimeConfig,
315 ) -> Result<Self, WasmImporterError> {
316 let path = path.into();
317
318 let engine = sandbox::shared_engine();
323
324 let module = Module::new(&engine, bytes).map_err(|e| WasmImporterError::Compile {
325 path: path.clone(),
326 source: anyhow::Error::from(e),
327 })?;
328
329 Self::validate_module(&module)?;
330
331 let module = Arc::new(module);
332
333 let metadata = call_metadata(&engine, &module, config)?;
337
338 Ok(Self {
339 path,
340 name: metadata.name,
341 description: metadata.description,
342 module,
343 engine,
344 config,
345 })
346 }
347
348 #[must_use]
351 pub fn path(&self) -> &Path {
352 &self.path
353 }
354
355 #[must_use]
361 pub const fn runtime_config(&self) -> WasmRuntimeConfig {
362 self.config
363 }
364
365 fn validate_module(module: &Module) -> Result<(), WasmImporterError> {
367 if let Some(import) = module.imports().next() {
368 return Err(WasmImporterError::ForbiddenImport {
369 module: import.module().to_string(),
370 name: import.name().to_string(),
371 });
372 }
373
374 let exports: Vec<_> = module.exports().map(|e| e.name().to_string()).collect();
375 for required in &[
376 "memory",
377 "alloc",
378 "metadata",
379 "identify",
380 "extract",
381 "extract_enriched",
382 ] {
383 if !exports.iter().any(|n| n == required) {
384 return Err(WasmImporterError::MissingExport(required));
385 }
386 }
387 Ok(())
388 }
389
390 fn call_msgpack<I: Serialize, O: DeserializeOwned>(
395 &self,
396 entry: &'static str,
397 input: &I,
398 ) -> Result<O, WasmImporterError> {
399 call_msgpack_with(&self.engine, &self.module, self.config, entry, input)
400 }
401}
402
403const fn validate_input_size(len: usize) -> Result<u32, WasmImporterError> {
408 if len > MAX_INPUT_BYTES {
409 return Err(WasmImporterError::InputTooLarge {
410 len,
411 max: MAX_INPUT_BYTES,
412 });
413 }
414 Ok(len as u32)
416}
417
418fn read_packed_output(
425 store: &Store<StoreState>,
426 memory: &wasmtime::Memory,
427 packed: u64,
428) -> Result<Vec<u8>, WasmImporterError> {
429 let out_ptr = (packed >> 32) as u32;
430 let out_len = (packed & 0xFFFF_FFFF) as u32 as usize;
431 if out_len > MAX_OUTPUT_BYTES {
432 return Err(WasmImporterError::OutputTooLarge {
433 len: out_len,
434 max: MAX_OUTPUT_BYTES,
435 });
436 }
437 let mut out_bytes = vec![0u8; out_len];
438 memory
439 .read(store, out_ptr as usize, &mut out_bytes)
440 .map_err(|e| WasmImporterError::Runtime(e.into()))?;
441 Ok(out_bytes)
442}
443
444fn call_msgpack_with<I: Serialize, O: DeserializeOwned>(
448 engine: &Engine,
449 module: &Module,
450 config: WasmRuntimeConfig,
451 entry: &'static str,
452 input: &I,
453) -> Result<O, WasmImporterError> {
454 let input_bytes = rmp_serde::to_vec(input).map_err(WasmImporterError::Encode)?;
455 let input_len = validate_input_size(input_bytes.len())?;
456
457 let mut store = sandbox::make_sandboxed_store(engine, config.max_memory, config.max_time_secs)
458 .map_err(runtime_err)?;
459
460 let linker = Linker::new(engine);
462 let instance = linker
463 .instantiate(&mut store, module)
464 .map_err(runtime_err)?;
465
466 let memory = instance
477 .get_memory(&mut store, "memory")
478 .expect("validate_module verified `memory` export at load");
479
480 let alloc = instance
481 .get_typed_func::<u32, u32>(&mut store, "alloc")
482 .map_err(|e| WasmImporterError::ExportSignatureMismatch {
483 name: "alloc",
484 source: anyhow::Error::from(e),
485 })?;
486
487 let input_ptr = alloc.call(&mut store, input_len).map_err(runtime_err)?;
488 memory
489 .write(&mut store, input_ptr as usize, &input_bytes)
490 .map_err(|e| WasmImporterError::Runtime(e.into()))?;
491
492 let func = instance
493 .get_typed_func::<(u32, u32), u64>(&mut store, entry)
494 .map_err(|e| WasmImporterError::ExportSignatureMismatch {
495 name: entry,
496 source: anyhow::Error::from(e),
497 })?;
498
499 let packed = func
500 .call(&mut store, (input_ptr, input_len))
501 .map_err(runtime_err)?;
502
503 let out_bytes = read_packed_output(&store, &memory, packed)?;
504 rmp_serde::from_slice(&out_bytes).map_err(WasmImporterError::Decode)
505}
506
507fn call_metadata(
511 engine: &Engine,
512 module: &Module,
513 config: WasmRuntimeConfig,
514) -> Result<MetadataOutput, WasmImporterError> {
515 let mut store = sandbox::make_sandboxed_store(engine, config.max_memory, config.max_time_secs)
516 .map_err(runtime_err)?;
517
518 let linker = Linker::new(engine);
519 let instance = linker
520 .instantiate(&mut store, module)
521 .map_err(runtime_err)?;
522
523 match sandbox::check_guest_abi(&instance, &mut store) {
531 sandbox::AbiCheck::Match => {}
532 sandbox::AbiCheck::Missing => {
533 return Err(WasmImporterError::AbiVersionMissing {
534 export: rustledger_plugin_types::ABI_VERSION_EXPORT,
535 expected: sandbox::HOST_ABI_VERSION,
536 });
537 }
538 sandbox::AbiCheck::Mismatch { found } => {
539 return Err(WasmImporterError::AbiVersionMismatch {
540 found,
541 expected: sandbox::HOST_ABI_VERSION,
542 });
543 }
544 }
545
546 let memory = instance
548 .get_memory(&mut store, "memory")
549 .expect("validate_module verified `memory` export at load");
550
551 let metadata = instance
554 .get_typed_func::<(), u64>(&mut store, "metadata")
555 .map_err(|e| WasmImporterError::ExportSignatureMismatch {
556 name: "metadata",
557 source: anyhow::Error::from(e),
558 })?;
559
560 let packed = metadata.call(&mut store, ()).map_err(runtime_err)?;
561 let out_bytes = read_packed_output(&store, &memory, packed)?;
562 rmp_serde::from_slice(&out_bytes).map_err(WasmImporterError::Decode)
563}
564
565fn build_wasm_input(path: &Path, content: Vec<u8>, config: &ImporterConfig) -> ImporterInput {
571 let mut options = std::collections::HashMap::new();
572 let ImporterType::Csv(csv) = &config.importer_type;
573 project_csv_config_into_options(csv, &mut options);
574 ImporterInput {
575 path: path.to_string_lossy().into_owned(),
576 content,
577 account: config.account.clone(),
578 currency: config.currency.clone(),
579 options,
580 }
581}
582
583fn project_csv_config_into_options(
608 csv: &CsvConfig,
609 options: &mut std::collections::HashMap<String, String>,
610) {
611 options.insert("date_format".to_string(), csv.date_format.clone());
612 options.insert("delimiter".to_string(), csv.delimiter.to_string());
613 options.insert("has_header".to_string(), csv.has_header.to_string());
614 options.insert("skip_rows".to_string(), csv.skip_rows.to_string());
615 options.insert("invert_sign".to_string(), csv.invert_sign.to_string());
616 options.insert(
617 "skip_zero_amounts".to_string(),
618 csv.skip_zero_amounts.to_string(),
619 );
620 if let Some(de) = &csv.default_expense {
621 options.insert("default_expense".to_string(), de.clone());
622 }
623 if let Some(di) = &csv.default_income {
624 options.insert("default_income".to_string(), di.clone());
625 }
626}
627
628fn format_plugin_error(e: &PluginError) -> String {
638 let severity = match e.severity {
639 PluginErrorSeverity::Error => "error",
640 PluginErrorSeverity::Warning => "warning",
641 };
642 let location = match (&e.source_file, e.line_number) {
643 (Some(f), Some(n)) => format!(" {f}:{n}"),
644 (Some(f), None) => format!(" {f}"),
645 (None, Some(n)) => format!(" line {n}"),
646 (None, None) => String::new(),
647 };
648 format!("{severity}{location}: {}", e.message)
649}
650
651fn output_to_import_result(out: ImporterOutput) -> anyhow::Result<ImportResult> {
669 let mut directives = Vec::with_capacity(out.directives.len());
670 for w in out.directives {
671 let d = rustledger_plugin::convert::wrapper_to_directive(&w)
672 .map_err(|e| anyhow::anyhow!("WASM importer returned invalid directive: {e:?}"))?;
673 directives.push(d);
674 }
675 let mut result = ImportResult::new(directives);
676 for w in out.warnings {
677 result = result.with_warning(w);
678 }
679 for e in &out.errors {
685 result = result.with_warning(format_plugin_error(e));
686 }
687 Ok(result)
688}
689
690impl Importer for WasmImporter {
691 fn name(&self) -> &str {
692 &self.name
693 }
694
695 fn description(&self) -> &str {
696 &self.description
697 }
698
699 fn identify(&self, path: &Path) -> bool {
700 let input = IdentifyInput {
701 path: path.to_string_lossy().into_owned(),
702 };
703 match self.call_msgpack::<_, IdentifyOutput>("identify", &input) {
710 Ok(out) => out.matches,
711 Err(e) => {
712 eprintln!(
713 "warning: WASM importer `{}` identify({}) failed: {e}",
714 self.name,
715 path.display()
716 );
717 false
718 }
719 }
720 }
721
722 fn extract(&self, path: &Path, config: &ImporterConfig) -> anyhow::Result<ImportResult> {
723 let content = std::fs::read(path).map_err(|source| WasmImporterError::Io {
728 path: path.to_path_buf(),
729 source,
730 })?;
731 let input = build_wasm_input(path, content, config);
732 let output: ImporterOutput = self.call_msgpack("extract", &input)?;
733 output_to_import_result(output)
734 }
735
736 fn extract_enriched(
737 &self,
738 path: &Path,
739 config: &ImporterConfig,
740 ) -> anyhow::Result<EnrichedImportResult> {
741 let content = std::fs::read(path).map_err(|source| WasmImporterError::Io {
742 path: path.to_path_buf(),
743 source,
744 })?;
745 let input = build_wasm_input(path, content, config);
746 let output: EnrichedImporterOutput = self.call_msgpack("extract_enriched", &input)?;
747 bridge_enriched_output(output)
748 }
749}
750
751fn bridge_enriched_output(output: EnrichedImporterOutput) -> anyhow::Result<EnrichedImportResult> {
769 let mut entries = Vec::with_capacity(output.entries.len());
770 let mut bridge_warnings: Vec<String> = Vec::new();
771 for (wrapper, enr) in output.entries {
772 let dir = rustledger_plugin::convert::wrapper_to_directive(&wrapper)
773 .map_err(|e| anyhow::anyhow!("WASM importer returned invalid directive: {e:?}"))?;
774 let method = parse_method(&enr.method).unwrap_or_else(|unknown| {
775 bridge_warnings.push(format!(
776 "warning: WASM importer used unknown categorization method `{unknown}`, falling back to Default"
777 ));
778 rustledger_ops::enrichment::CategorizationMethod::Default
779 });
780 let alternatives = enr
781 .alternatives
782 .into_iter()
783 .map(|a| {
784 let alt_method = parse_method(&a.method).unwrap_or_else(|unknown| {
785 bridge_warnings.push(format!(
786 "warning: WASM importer used unknown categorization method `{unknown}` in alternative, falling back to Default"
787 ));
788 rustledger_ops::enrichment::CategorizationMethod::Default
789 });
790 rustledger_ops::enrichment::Alternative {
791 account: a.account,
792 confidence: a.confidence,
793 method: alt_method,
794 }
795 })
796 .collect();
797 let fingerprint = match enr.fingerprint {
798 Some(hex) => match Fingerprint::from_hex(&hex) {
799 Ok(fp) => Some(fp),
800 Err(e) => {
801 bridge_warnings.push(format!(
802 "warning: WASM importer returned malformed fingerprint hex `{hex}`: {e}"
803 ));
804 None
805 }
806 },
807 None => None,
808 };
809 let enrichment = rustledger_ops::enrichment::Enrichment {
810 directive_index: enr.directive_index,
811 confidence: enr.confidence,
812 method,
813 alternatives,
814 fingerprint,
815 };
816 entries.push((dir, enrichment));
817 }
818 let mut enriched = EnrichedImportResult::new(entries);
819 for w in bridge_warnings {
820 enriched = enriched.with_warning(w);
821 }
822 for w in output.warnings {
823 enriched = enriched.with_warning(w);
824 }
825 for e in &output.errors {
826 enriched = enriched.with_warning(format_plugin_error(e));
827 }
828 Ok(enriched)
829}
830
831fn parse_method(s: &str) -> Result<rustledger_ops::enrichment::CategorizationMethod, &str> {
841 use rustledger_ops::enrichment::CategorizationMethod;
842 match s {
843 "rule" => Ok(CategorizationMethod::Rule),
844 "merchant-dict" => Ok(CategorizationMethod::MerchantDict),
845 "ml" => Ok(CategorizationMethod::Ml),
846 "llm" => Ok(CategorizationMethod::Llm),
847 "manual" => Ok(CategorizationMethod::Manual),
848 "default" => Ok(CategorizationMethod::Default),
849 unknown => Err(unknown),
850 }
851}
852
853#[cfg(test)]
854mod tests {
855 use super::*;
856
857 #[test]
858 fn wasm_runtime_config_default_is_sensible() {
859 let c = WasmRuntimeConfig::default();
860 assert_eq!(c.max_memory, sandbox::DEFAULT_SANDBOX_MAX_MEMORY);
865 assert_eq!(c.max_time_secs, sandbox::DEFAULT_SANDBOX_MAX_TIME_SECS);
866 }
867
868 #[test]
869 fn validate_module_rejects_module_with_imports() {
870 let wat = r#"
872 (module
873 (import "env" "ext" (func $ext))
874 (memory (export "memory") 1)
875 (func (export "alloc") (param i32) (result i32) i32.const 0)
876 (func (export "metadata") (result i64) i64.const 0)
877 (func (export "identify") (param i32 i32) (result i64) i64.const 0)
878 (func (export "extract") (param i32 i32) (result i64) i64.const 0)
879 (func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
880 )
881 "#;
882 let bytes = wat::parse_str(wat).expect("WAT parses");
883 let engine = sandbox::shared_engine();
884 let module = Module::new(&engine, &bytes).unwrap();
885 let err = WasmImporter::validate_module(&module).unwrap_err();
886 assert!(matches!(err, WasmImporterError::ForbiddenImport { .. }));
887 }
888
889 #[test]
890 fn validate_module_rejects_missing_export() {
891 let wat = r#"
893 (module
894 (memory (export "memory") 1)
895 (func (export "alloc") (param i32) (result i32) i32.const 0)
896 (func (export "metadata") (result i64) i64.const 0)
897 )
898 "#;
899 let bytes = wat::parse_str(wat).expect("WAT parses");
900 let engine = sandbox::shared_engine();
901 let module = Module::new(&engine, &bytes).unwrap();
902 let err = WasmImporter::validate_module(&module).unwrap_err();
903 assert!(matches!(err, WasmImporterError::MissingExport(_)));
904 }
905
906 #[test]
907 fn parse_method_round_trips_known_values() {
908 use rustledger_ops::enrichment::CategorizationMethod;
909 assert!(matches!(
910 parse_method("rule"),
911 Ok(CategorizationMethod::Rule)
912 ));
913 assert!(matches!(
914 parse_method("merchant-dict"),
915 Ok(CategorizationMethod::MerchantDict)
916 ));
917 assert!(matches!(parse_method("ml"), Ok(CategorizationMethod::Ml)));
918 assert!(matches!(parse_method("llm"), Ok(CategorizationMethod::Llm)));
919 assert!(matches!(
920 parse_method("manual"),
921 Ok(CategorizationMethod::Manual)
922 ));
923 assert!(matches!(
924 parse_method("default"),
925 Ok(CategorizationMethod::Default)
926 ));
927 }
928
929 #[test]
930 fn parse_method_round_trips_via_as_meta_value() {
931 use rustledger_ops::enrichment::CategorizationMethod;
935 for m in [
936 CategorizationMethod::Rule,
937 CategorizationMethod::MerchantDict,
938 CategorizationMethod::Ml,
939 CategorizationMethod::Llm,
940 CategorizationMethod::Manual,
941 CategorizationMethod::Default,
942 ] {
943 let s = m.as_meta_value();
944 let parsed = parse_method(s)
945 .unwrap_or_else(|u| panic!("as_meta_value `{u}` not handled by parse_method"));
946 assert_eq!(parsed, m, "round-trip failed for {m:?}");
947 }
948 }
949
950 #[test]
951 fn parse_method_unknown_surfaces_the_unknown_string() {
952 assert_eq!(parse_method("future-method"), Err("future-method"));
958 assert_eq!(parse_method("merchant_dict"), Err("merchant_dict"));
959 assert_eq!(parse_method(""), Err(""));
960 }
961
962 #[test]
963 fn format_plugin_error_with_full_location() {
964 let e = PluginError::error("bad row").at("foo.csv", 42);
965 assert_eq!(format_plugin_error(&e), "error foo.csv:42: bad row");
966 }
967
968 #[test]
969 fn format_plugin_error_warning_severity() {
970 let e = PluginError::warning("weird value").at("foo.csv", 42);
971 assert_eq!(format_plugin_error(&e), "warning foo.csv:42: weird value");
972 }
973
974 #[test]
975 fn format_plugin_error_no_location_no_orphan_colon() {
976 let e = PluginError::error("parser bug");
977 assert_eq!(format_plugin_error(&e), "error: parser bug");
979 }
980
981 #[test]
982 fn format_plugin_error_file_only() {
983 let e = PluginError::warning("weird value");
984 let e = PluginError {
985 source_file: Some("foo.csv".to_string()),
986 ..e
987 };
988 assert_eq!(format_plugin_error(&e), "warning foo.csv: weird value");
989 }
990
991 #[test]
992 fn format_plugin_error_line_only_uses_human_phrasing() {
993 let e = PluginError::warning("weird");
995 let e = PluginError {
996 line_number: Some(42),
997 ..e
998 };
999 assert_eq!(format_plugin_error(&e), "warning line 42: weird");
1000 }
1001
1002 fn roundtrip_wat() -> &'static str {
1011 r#"
1012 (module
1013 (memory (export "memory") 1)
1014
1015 ;; MetadataOutput { name: "tst", description: "tst" }
1016 ;; 0x92 fixarray-2, 0xa3 fixstr-3 "tst", 0xa3 fixstr-3 "tst"
1017 (data (i32.const 0) "\92\a3tst\a3tst")
1018
1019 ;; IdentifyOutput { matches: true }
1020 ;; 0x91 fixarray-1, 0xc3 true
1021 (data (i32.const 16) "\91\c3")
1022
1023 ;; ImporterOutput { directives: [], warnings: [], errors: [] }
1024 ;; 0x93 fixarray-3, then three 0x90 fixarray-0
1025 (data (i32.const 24) "\93\90\90\90")
1026
1027 ;; EnrichedImporterOutput { entries: [], warnings: [], errors: [] }
1028 (data (i32.const 32) "\93\90\90\90")
1029
1030 ;; bump allocator: hand out at $bump, advance by $size.
1031 ;; NOTE: real importers MUST bounds-check $bump+$size
1032 ;; against current memory and call `memory.grow` (subject
1033 ;; to MemoryLimiter approval). This test fixture skips
1034 ;; that — inputs in the test are small and we declare 1
1035 ;; full page (64 KiB), so the bump never crosses the
1036 ;; boundary.
1037 (global $bump (mut i32) (i32.const 1024))
1038 (func (export "alloc") (param $size i32) (result i32)
1039 (local $ret i32)
1040 global.get $bump
1041 local.set $ret
1042 global.get $bump
1043 local.get $size
1044 i32.add
1045 global.set $bump
1046 local.get $ret)
1047
1048 ;; metadata: ptr=0, len=9 → (0<<32) | 9 = 9
1049 (func (export "metadata") (result i64)
1050 i64.const 9)
1051
1052 ;; identify: ptr=16, len=2 → (16<<32) | 2
1053 (func (export "identify") (param i32 i32) (result i64)
1054 i64.const 0x10_0000_0002)
1055
1056 ;; extract: ptr=24, len=4 → (24<<32) | 4
1057 (func (export "extract") (param i32 i32) (result i64)
1058 i64.const 0x18_0000_0004)
1059
1060 ;; extract_enriched: ptr=32, len=4 → (32<<32) | 4
1061 (func (export "extract_enriched") (param i32 i32) (result i64)
1062 i64.const 0x20_0000_0004)
1063
1064 ;; ABI handshake export. Must equal sandbox::HOST_ABI_VERSION
1065 ;; (rustledger_plugin_types::ABI_VERSION = 1). If the ABI
1066 ;; version is ever bumped, this literal moves in lockstep —
1067 ;; the deliberate test update that proves a real guest would
1068 ;; need rebuilding too.
1069 (func (export "__rustledger_abi_version") (result i32)
1070 i32.const 1)
1071 )
1072 "#
1073 }
1074
1075 fn minimal_config() -> ImporterConfig {
1076 ImporterConfig {
1077 account: "Assets:Bank:Checking".to_string(),
1078 currency: Some("USD".to_string()),
1079 importer_type: ImporterType::Csv(CsvConfig::default()),
1080 }
1081 }
1082
1083 fn importer_wat_with_abi(abi_section: &str) -> String {
1091 format!(
1092 r#"
1093 (module
1094 (memory (export "memory") 1)
1095 (func (export "alloc") (param i32) (result i32) i32.const 0)
1096 (func (export "metadata") (result i64) i64.const 0)
1097 (func (export "identify") (param i32 i32) (result i64) i64.const 0)
1098 (func (export "extract") (param i32 i32) (result i64) i64.const 0)
1099 (func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
1100 {abi_section}
1101 )
1102 "#
1103 )
1104 }
1105
1106 #[test]
1110 fn load_rejects_importer_missing_abi_version() {
1111 let bytes = wat::parse_str(importer_wat_with_abi("")).expect("WAT parses");
1112 let err = WasmImporter::load_from_bytes(
1113 PathBuf::from("noabi.wasm"),
1114 &bytes,
1115 WasmRuntimeConfig::default(),
1116 )
1117 .expect_err("load must reject an importer with no ABI export");
1118 assert!(
1119 matches!(err, WasmImporterError::AbiVersionMissing { .. }),
1120 "expected AbiVersionMissing, got: {err:?}"
1121 );
1122 }
1123
1124 #[test]
1127 fn load_rejects_importer_with_mismatched_abi_version() {
1128 let wat = importer_wat_with_abi(
1130 r#"(func (export "__rustledger_abi_version") (result i32) i32.const 999)"#,
1131 );
1132 let bytes = wat::parse_str(wat).expect("WAT parses");
1133 let err = WasmImporter::load_from_bytes(
1134 PathBuf::from("badabi.wasm"),
1135 &bytes,
1136 WasmRuntimeConfig::default(),
1137 )
1138 .expect_err("load must reject an ABI-mismatched importer");
1139 match err {
1140 WasmImporterError::AbiVersionMismatch { found, expected } => {
1141 assert_eq!(found, 999);
1142 assert_eq!(expected, sandbox::HOST_ABI_VERSION);
1143 }
1144 other => panic!("expected AbiVersionMismatch, got: {other:?}"),
1145 }
1146 }
1147
1148 #[test]
1149 fn end_to_end_wat_module_round_trips_all_entry_points() {
1150 let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1151 let importer = WasmImporter::load_from_bytes(
1152 PathBuf::from("test.wasm"),
1153 &bytes,
1154 WasmRuntimeConfig::default(),
1155 )
1156 .expect("module loads + metadata round-trips");
1157
1158 assert_eq!(importer.name(), "tst");
1162 assert_eq!(importer.description(), "tst");
1163
1164 assert!(importer.identify(Path::new("anything.csv")));
1166
1167 let tmp = tempfile::NamedTempFile::new().expect("tempfile");
1169 let config = minimal_config();
1170
1171 let result = importer
1172 .extract(tmp.path(), &config)
1173 .expect("extract round-trip");
1174 assert!(result.directives.is_empty());
1175 assert!(result.warnings.is_empty());
1176
1177 let enriched = importer
1178 .extract_enriched(tmp.path(), &config)
1179 .expect("extract_enriched round-trip");
1180 assert!(enriched.entries.is_empty());
1181 assert!(enriched.warnings.is_empty());
1182 }
1183
1184 #[test]
1185 fn oversized_output_is_rejected_before_allocation() {
1186 let wat = r#"
1190 (module
1191 (memory (export "memory") 1)
1192 (func (export "alloc") (param i32) (result i32) i32.const 0)
1193 ;; metadata: ptr=0, len=u32::MAX
1194 (func (export "metadata") (result i64)
1195 i64.const 0x0000_0000_ffff_ffff)
1196 (func (export "identify") (param i32 i32) (result i64) i64.const 0)
1197 (func (export "extract") (param i32 i32) (result i64) i64.const 0)
1198 (func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
1199 ;; ABI handshake passes so the oversized-metadata check
1200 ;; downstream is what rejects this module (issue #1234).
1201 (func (export "__rustledger_abi_version") (result i32) i32.const 1)
1202 )
1203 "#;
1204 let bytes = wat::parse_str(wat).expect("WAT parses");
1205 let Err(err) = WasmImporter::load_from_bytes(
1209 PathBuf::from("oversized.wasm"),
1210 &bytes,
1211 WasmRuntimeConfig::default(),
1212 ) else {
1213 panic!("oversized metadata output should have been rejected at load");
1214 };
1215 assert!(
1216 matches!(
1217 err,
1218 WasmImporterError::OutputTooLarge { len, max }
1219 if len == u32::MAX as usize && max == MAX_OUTPUT_BYTES
1220 ),
1221 "expected OutputTooLarge, got {err:?}"
1222 );
1223 }
1224
1225 #[test]
1233 fn zero_max_time_secs_does_not_starve_fuel() {
1234 let config = WasmRuntimeConfig {
1239 max_memory: sandbox::DEFAULT_SANDBOX_MAX_MEMORY,
1240 max_time_secs: 0,
1241 };
1242 let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1243 let importer = WasmImporter::load_from_bytes(PathBuf::from("test.wasm"), &bytes, config)
1246 .expect("zero max_time_secs is clamped, not starved");
1247 assert_eq!(importer.name(), "tst");
1248 }
1249
1250 #[test]
1251 fn validate_input_size_accepts_at_cap_and_rejects_above() {
1252 assert_eq!(
1254 validate_input_size(MAX_INPUT_BYTES).unwrap(),
1255 MAX_INPUT_BYTES as u32
1256 );
1257 let err = validate_input_size(MAX_INPUT_BYTES + 1).unwrap_err();
1260 assert!(
1261 matches!(
1262 err,
1263 WasmImporterError::InputTooLarge { len, max }
1264 if len == MAX_INPUT_BYTES + 1 && max == MAX_INPUT_BYTES
1265 ),
1266 "got: {err:?}"
1267 );
1268 }
1269
1270 #[test]
1271 fn fuel_calc_saturates_instead_of_overflowing() {
1272 let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1277 let config = WasmRuntimeConfig {
1278 max_memory: sandbox::DEFAULT_SANDBOX_MAX_MEMORY,
1279 max_time_secs: u64::MAX,
1280 };
1281 let importer = WasmImporter::load_from_bytes(PathBuf::from("test.wasm"), &bytes, config)
1284 .expect("u64::MAX max_time_secs saturates, doesn't overflow");
1285 assert_eq!(importer.name(), "tst");
1286 }
1287
1288 #[test]
1289 fn wrong_signature_export_surfaces_export_signature_mismatch() {
1290 let wat = r#"
1296 (module
1297 (memory (export "memory") 1)
1298 (func (export "alloc") (param i32) (result i32) i32.const 0)
1299 ;; WRONG: should be (result i64), declared as (result i32)
1300 (func (export "metadata") (result i32) i32.const 0)
1301 (func (export "identify") (param i32 i32) (result i64) i64.const 0)
1302 (func (export "extract") (param i32 i32) (result i64) i64.const 0)
1303 (func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
1304 ;; Correct ABI so the check passes and the metadata
1305 ;; signature mismatch is what surfaces (issue #1234).
1306 (func (export "__rustledger_abi_version") (result i32) i32.const 1)
1307 )
1308 "#;
1309 let bytes = wat::parse_str(wat).expect("WAT parses");
1310 let Err(err) = WasmImporter::load_from_bytes(
1311 PathBuf::from("badsig.wasm"),
1312 &bytes,
1313 WasmRuntimeConfig::default(),
1314 ) else {
1315 panic!("metadata with wrong signature should be rejected");
1316 };
1317 assert!(
1322 matches!(
1323 err,
1324 WasmImporterError::ExportSignatureMismatch {
1325 name: "metadata",
1326 ..
1327 }
1328 ),
1329 "expected ExportSignatureMismatch for metadata, got {err:?}"
1330 );
1331 }
1332
1333 #[test]
1334 fn initial_memory_above_cap_is_rejected_via_limiter_wiring() {
1335 let wat = r#"
1343 (module
1344 (memory (export "memory") 5000)
1345 (func (export "alloc") (param i32) (result i32) i32.const 0)
1346 (func (export "metadata") (result i64) i64.const 0)
1347 (func (export "identify") (param i32 i32) (result i64) i64.const 0)
1348 (func (export "extract") (param i32 i32) (result i64) i64.const 0)
1349 (func (export "extract_enriched") (param i32 i32) (result i64) i64.const 0)
1350 )
1351 "#;
1352 let bytes = wat::parse_str(wat).expect("WAT parses");
1353 let config = WasmRuntimeConfig {
1354 max_memory: 64 * 1024 * 1024,
1355 max_time_secs: 30,
1356 };
1357 let Err(err) = WasmImporter::load_from_bytes(PathBuf::from("bigmem.wasm"), &bytes, config)
1358 else {
1359 panic!("module declaring 320 MiB initial memory should be rejected with 64 MiB cap");
1360 };
1361 assert!(
1364 matches!(err, WasmImporterError::Runtime(_)),
1365 "expected Runtime (instantiate failed via limiter), got {err:?}"
1366 );
1367 }
1368
1369 use rustledger_plugin_types::{
1377 AlternativeWrapper, DirectiveData, DirectiveWrapper, EnrichmentWrapper, OpenData,
1378 };
1379
1380 fn open_wrapper(account: &str) -> DirectiveWrapper {
1381 DirectiveWrapper {
1382 directive_type: String::new(),
1383 date: "2024-01-01".to_string(),
1384 filename: None,
1385 lineno: None,
1386 data: DirectiveData::Open(OpenData {
1387 account: account.to_string(),
1388 currencies: vec![],
1389 booking: None,
1390 metadata: vec![],
1391 }),
1392 }
1393 }
1394
1395 fn enrichment_wrapper(method: &str, fingerprint: Option<String>) -> EnrichmentWrapper {
1396 EnrichmentWrapper {
1397 directive_index: 0,
1398 confidence: 1.0,
1399 method: method.to_string(),
1400 alternatives: vec![],
1401 fingerprint,
1402 }
1403 }
1404
1405 #[test]
1406 fn bridge_round_trips_valid_fingerprint_hex() {
1407 let fp = Fingerprint::compute("2024-01-01", Some("100"), "coffee");
1408 let hex = fp.to_hex();
1409 let out = EnrichedImporterOutput {
1410 entries: vec![(
1411 open_wrapper("Assets:Bank"),
1412 enrichment_wrapper("rule", Some(hex)),
1413 )],
1414 warnings: vec![],
1415 errors: vec![],
1416 };
1417 let bridged = bridge_enriched_output(out).expect("bridge succeeds");
1418 assert_eq!(bridged.entries.len(), 1);
1419 assert_eq!(
1420 bridged.entries[0].1.fingerprint,
1421 Some(fp),
1422 "fingerprint should round-trip"
1423 );
1424 assert!(bridged.warnings.is_empty(), "no warnings expected");
1425 }
1426
1427 #[test]
1428 fn bridge_warns_on_malformed_fingerprint_hex_and_drops_to_none() {
1429 let out = EnrichedImporterOutput {
1430 entries: vec![(
1431 open_wrapper("Assets:Bank"),
1432 enrichment_wrapper("rule", Some("not-a-valid-hex".to_string())),
1433 )],
1434 warnings: vec![],
1435 errors: vec![],
1436 };
1437 let bridged = bridge_enriched_output(out).expect("bridge succeeds");
1438 assert_eq!(bridged.entries.len(), 1);
1439 assert_eq!(bridged.entries[0].1.fingerprint, None);
1440 assert_eq!(bridged.warnings.len(), 1);
1443 assert!(
1444 bridged.warnings[0].contains("not-a-valid-hex"),
1445 "warning should name the bad hex: {}",
1446 bridged.warnings[0]
1447 );
1448 }
1449
1450 #[test]
1451 fn bridge_warns_on_unknown_method_and_falls_back_to_default() {
1452 use rustledger_ops::enrichment::CategorizationMethod;
1453 let out = EnrichedImporterOutput {
1454 entries: vec![(
1455 open_wrapper("Assets:Bank"),
1456 enrichment_wrapper("merchant_dict", None), )],
1458 warnings: vec![],
1459 errors: vec![],
1460 };
1461 let bridged = bridge_enriched_output(out).expect("bridge succeeds");
1462 assert_eq!(bridged.entries[0].1.method, CategorizationMethod::Default);
1463 assert_eq!(bridged.warnings.len(), 1);
1464 assert!(
1465 bridged.warnings[0].contains("merchant_dict"),
1466 "warning should name the unknown method: {}",
1467 bridged.warnings[0]
1468 );
1469 }
1470
1471 #[test]
1472 fn bridge_warns_on_unknown_method_in_alternative() {
1473 use rustledger_ops::enrichment::CategorizationMethod;
1474 let mut enr = enrichment_wrapper("rule", None);
1475 enr.alternatives = vec![AlternativeWrapper {
1476 account: "Expenses:Other".to_string(),
1477 confidence: 0.3,
1478 method: "future-method".to_string(),
1479 }];
1480 let out = EnrichedImporterOutput {
1481 entries: vec![(open_wrapper("Assets:Bank"), enr)],
1482 warnings: vec![],
1483 errors: vec![],
1484 };
1485 let bridged = bridge_enriched_output(out).expect("bridge succeeds");
1486 let alt = &bridged.entries[0].1.alternatives[0];
1487 assert_eq!(alt.method, CategorizationMethod::Default);
1488 assert_eq!(bridged.warnings.len(), 1);
1489 assert!(bridged.warnings[0].contains("future-method"));
1490 assert!(
1491 bridged.warnings[0].contains("alternative"),
1492 "warning should distinguish the alternative slot: {}",
1493 bridged.warnings[0]
1494 );
1495 }
1496
1497 #[test]
1498 fn bridge_warning_ordering_is_bridge_then_output_warnings_then_errors() {
1499 let out = EnrichedImporterOutput {
1503 entries: vec![(
1504 open_wrapper("Assets:Bank"),
1505 enrichment_wrapper("nonsense", None),
1506 )],
1507 warnings: vec!["informational warning".to_string()],
1508 errors: vec![PluginError::error("structured error").at("foo.csv", 7)],
1509 };
1510 let bridged = bridge_enriched_output(out).expect("bridge succeeds");
1511 assert_eq!(bridged.warnings.len(), 3);
1512 assert!(
1513 bridged.warnings[0].contains("nonsense"),
1514 "first: bridge warning, got {}",
1515 bridged.warnings[0]
1516 );
1517 assert_eq!(
1518 bridged.warnings[1], "informational warning",
1519 "second: output.warnings forwarded verbatim"
1520 );
1521 assert_eq!(
1522 bridged.warnings[2], "error foo.csv:7: structured error",
1523 "third: output.errors via format_plugin_error"
1524 );
1525 }
1526
1527 #[test]
1528 fn output_to_import_result_uses_severity_aware_formatter() {
1529 let out = ImporterOutput {
1534 directives: vec![],
1535 warnings: vec!["plain warning".to_string()],
1536 errors: vec![
1537 PluginError::error("bad row").at("foo.csv", 42),
1538 PluginError::warning("weird value"),
1539 ],
1540 };
1541 let result = output_to_import_result(out).expect("succeeds");
1542 assert_eq!(
1543 result.warnings,
1544 vec![
1545 "plain warning".to_string(),
1546 "error foo.csv:42: bad row".to_string(),
1547 "warning: weird value".to_string(),
1548 ]
1549 );
1550 }
1551
1552 #[test]
1555 fn load_embedded_uses_name_as_path_and_default_config() {
1556 let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1557 let importer =
1558 WasmImporter::load_embedded("inline-test", &bytes).expect("embedded load succeeds");
1559 assert_eq!(importer.path(), Path::new("inline-test"));
1562 assert_eq!(
1564 importer.runtime_config().max_memory,
1565 sandbox::DEFAULT_SANDBOX_MAX_MEMORY
1566 );
1567 assert_eq!(
1568 importer.runtime_config().max_time_secs,
1569 sandbox::DEFAULT_SANDBOX_MAX_TIME_SECS
1570 );
1571 assert_eq!(importer.name(), "tst");
1573 }
1574
1575 #[test]
1576 fn runtime_config_returns_the_loaded_config() {
1577 let custom = WasmRuntimeConfig {
1578 max_memory: 128 * 1024 * 1024,
1579 max_time_secs: 60,
1580 };
1581 let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1582 let importer = WasmImporter::load_from_bytes(PathBuf::from("custom.wasm"), &bytes, custom)
1583 .expect("custom-config load succeeds");
1584 assert_eq!(importer.runtime_config().max_memory, custom.max_memory);
1585 assert_eq!(
1586 importer.runtime_config().max_time_secs,
1587 custom.max_time_secs
1588 );
1589 }
1590
1591 #[test]
1592 fn debug_impl_does_not_panic_and_redacts_wasmtime_types() {
1593 let bytes = wat::parse_str(roundtrip_wat()).expect("WAT parses");
1594 let importer = WasmImporter::load_embedded("dbg-test", &bytes).expect("load succeeds");
1595 let s = format!("{importer:?}");
1596 assert!(s.contains("WasmImporter"));
1598 assert!(s.contains("dbg-test"));
1599 assert!(s.contains("tst")); assert!(
1602 !s.contains("Module {"),
1603 "Debug should not expand the wasmtime Module: {s}"
1604 );
1605 }
1606}