tensogram_wasm/lib.rs
1// (C) Copyright 2026- ECMWF and individual contributors.
2//
3// This software is licensed under the terms of the Apache Licence Version 2.0
4// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5// In applying this licence, ECMWF does not waive the privileges and immunities
6// granted to it by virtue of its status as an intergovernmental organisation nor
7// does it submit to any jurisdiction.
8
9//! WebAssembly bindings for the Tensogram N-tensor message format.
10//!
11//! Provides encode, decode, scan, streaming decode, range decode, hash,
12//! validation, pre-encoded encode, `simple_packing` params, and a
13//! frame-at-a-time `StreamingEncoder` — accessible from JavaScript /
14//! TypeScript via `wasm-bindgen`.
15//!
16//! Tensor payloads are returned as zero-copy TypedArray views into
17//! WASM linear memory for 60fps visualisation performance.
18//!
19//! # Compressor support
20//!
21//! This WASM build supports lz4, szip (pure-Rust), and zstd (pure-Rust).
22//! Attempts to decode blosc2/zfp/sz3 compressed data will return an error.
23
24mod convert;
25mod encoder;
26mod extras;
27mod layout;
28mod remote_scan;
29mod streaming;
30
31use convert::*;
32use tensogram::{self as core, DecodeOptions};
33use wasm_bindgen::prelude::*;
34
35// ── Decode API ───────────────────────────────────────────────────────────────
36
37/// Decode all objects from a complete Tensogram message.
38///
39/// Returns a `DecodedMessage` handle that owns the decoded data.
40/// Use `.object_data_f32(i)` etc. to get zero-copy TypedArray views
41/// into the decoded payloads.
42///
43/// @param buf - Raw .tgm message bytes
44/// @param restore_non_finite - When true (default), decode writes canonical
45/// NaN / +Inf / -Inf at positions recorded in
46/// the frame's mask companion. Set to false to
47/// receive 0.0-substituted bytes as on disk.
48/// @param verify_hash - When true, verify each data-object frame's
49/// inline xxh3 hash against the recomputed
50/// digest. Default false (opt-in).
51/// Integrity failures are returned as a
52/// `JsValue` carrying a thrown `js_sys::Error`
53/// with structured properties attached: `name`
54/// is `"MissingHashError"` (when the per-frame
55/// `HASH_PRESENT` flag is clear) or
56/// `"HashMismatchError"` (when the slot
57/// disagrees), and `objectIndex` /
58/// `expected` / `actual` carry the structured
59/// payload that the TS wrapper routes to
60/// dedicated error classes. See
61/// `plans/DESIGN.md` §"Integrity Hashing".
62#[wasm_bindgen]
63pub fn decode(
64 buf: &[u8],
65 restore_non_finite: Option<bool>,
66 verify_hash: Option<bool>,
67) -> Result<DecodedMessage, JsValue> {
68 let options = DecodeOptions {
69 restore_non_finite: restore_non_finite.unwrap_or(true),
70 verify_hash: verify_hash.unwrap_or(false),
71 ..Default::default()
72 };
73 let (metadata, objects) = core::decode(buf, &options).map_err(js_err)?;
74 Ok(DecodedMessage { metadata, objects })
75}
76
77/// Decode only the global metadata from a message (no payload decoding).
78///
79/// @param buf - Raw .tgm message bytes
80/// @returns Plain JS object with version (synthesised from the
81/// preamble), base, _reserved_, _extra_ fields
82#[wasm_bindgen]
83pub fn decode_metadata(buf: &[u8]) -> Result<JsValue, JsValue> {
84 let meta = core::decode_metadata(buf).map_err(js_err)?;
85 metadata_to_js(&meta)
86}
87
88/// Decode a single object by index.
89///
90/// @param buf - Raw .tgm message bytes
91/// @param index - Zero-based object index
92/// @param restore_non_finite - Restore canonical NaN / Inf from mask companion (default: true)
93/// @param verify_hash - Per-frame hash verification (default false).
94/// See `decode` for the full contract.
95#[wasm_bindgen]
96pub fn decode_object(
97 buf: &[u8],
98 index: usize,
99 restore_non_finite: Option<bool>,
100 verify_hash: Option<bool>,
101) -> Result<DecodedMessage, JsValue> {
102 let options = DecodeOptions {
103 restore_non_finite: restore_non_finite.unwrap_or(true),
104 verify_hash: verify_hash.unwrap_or(false),
105 ..Default::default()
106 };
107 let (metadata, descriptor, data) = core::decode_object(buf, index, &options).map_err(js_err)?;
108 Ok(DecodedMessage {
109 metadata,
110 objects: vec![(descriptor, data)],
111 })
112}
113
114/// Scan a buffer for concatenated Tensogram messages.
115///
116/// Returns an array of `[offset, length]` pairs for each message found.
117///
118/// @param buf - Buffer potentially containing multiple .tgm messages
119/// @returns Array of [offset, length] pairs
120#[wasm_bindgen]
121pub fn scan(buf: &[u8]) -> Result<JsValue, JsValue> {
122 let positions = core::scan(buf);
123 to_js(&positions)
124}
125
126// ── Encode API ───────────────────────────────────────────────────────────────
127
128/// Encode objects into a Tensogram message.
129///
130/// @param metadata_js - GlobalMetadata as a plain JS object
131/// @param objects_js - Array of {descriptor, data} objects where data is a TypedArray
132/// @param hash - Whether to compute integrity hashes (default: true)
133/// @param allow_nan - When true, substitute NaN with 0 and record
134/// positions in a mask companion frame (default: false)
135/// @param allow_inf - When true, substitute +Inf / -Inf with 0 and
136/// record positions in per-sign masks (default: false)
137/// @param nan_mask_method - Mask compression method for the NaN mask
138/// @param pos_inf_mask_method - Mask compression method for the +Inf mask
139/// @param neg_inf_mask_method - Mask compression method for the -Inf mask
140/// @param small_mask_threshold_bytes - Mask size below which method="none" is forced (default: 128)
141/// @returns Uint8Array containing the encoded .tgm message
142#[wasm_bindgen]
143#[allow(clippy::too_many_arguments)]
144pub fn encode(
145 metadata_js: JsValue,
146 objects_js: js_sys::Array,
147 hash: Option<bool>,
148 allow_nan: Option<bool>,
149 allow_inf: Option<bool>,
150 nan_mask_method: Option<String>,
151 pos_inf_mask_method: Option<String>,
152 neg_inf_mask_method: Option<String>,
153 small_mask_threshold_bytes: Option<usize>,
154) -> Result<js_sys::Uint8Array, JsValue> {
155 let metadata = metadata_from_js(&metadata_js)?;
156 let (descriptors, data_vec) = extract_descriptor_data_pairs(&objects_js)?;
157 let pairs: Vec<(&core::DataObjectDescriptor, &[u8])> = descriptors
158 .iter()
159 .zip(data_vec.iter())
160 .map(|(d, v)| (d, v.as_slice()))
161 .collect();
162 let options = build_encode_options_full(
163 hash,
164 allow_nan,
165 allow_inf,
166 nan_mask_method.as_deref(),
167 pos_inf_mask_method.as_deref(),
168 neg_inf_mask_method.as_deref(),
169 small_mask_threshold_bytes,
170 )?;
171 let encoded = core::encode(&metadata, &pairs, &options).map_err(js_err)?;
172 // Return a JS-owned copy. We must not use `view_as_u8` here because
173 // `encoded` is a local Vec that will be dropped when this function
174 // returns — a view into it would be a dangling pointer.
175 Ok(js_sys::Uint8Array::from(encoded.as_slice()))
176}
177
178// ── DecodedMessage handle ────────────────────────────────────────────────────
179
180/// Handle to a decoded Tensogram message.
181///
182/// Owns the decoded payload data in WASM linear memory. Use the
183/// `object_data_*` methods to get zero-copy TypedArray views.
184///
185/// **Important**: The returned TypedArray views are invalidated if WASM
186/// memory grows. Read or copy the data before further WASM calls.
187/// Call `.free()` when done to release WASM memory.
188#[wasm_bindgen]
189pub struct DecodedMessage {
190 metadata: core::GlobalMetadata,
191 objects: Vec<core::DecodedObject>,
192}
193
194#[wasm_bindgen]
195impl DecodedMessage {
196 /// Global metadata as a plain JS object. The wire-format
197 /// `version` is synthesised from the preamble (v3: always `3`)
198 /// for TypeScript ergonomics — see `metadata_to_js` in
199 /// `convert.rs`.
200 pub fn metadata(&self) -> Result<JsValue, JsValue> {
201 metadata_to_js(&self.metadata)
202 }
203
204 /// Number of data objects in the message.
205 pub fn object_count(&self) -> usize {
206 self.objects.len()
207 }
208
209 /// Object descriptor (shape, dtype, encoding, etc.) as a JS object.
210 pub fn object_descriptor(&self, index: usize) -> Result<JsValue, JsValue> {
211 // Reuse payload() for the bounds check so the error message is consistent.
212 let _ = self.payload(index)?;
213 to_js(&self.objects[index].0)
214 }
215
216 // ── Zero-copy TypedArray views ───────────────────────────────────────
217
218 /// Zero-copy Float32Array view into the decoded payload.
219 ///
220 /// **Warning**: This view points directly into WASM linear memory.
221 /// It becomes invalid if WASM memory grows. Read the data or pass
222 /// it to WebGL before any further WASM calls.
223 pub fn object_data_f32(&self, index: usize) -> Result<js_sys::Float32Array, JsValue> {
224 let data = self.payload(index)?;
225 view_as_f32(data)
226 }
227
228 /// Zero-copy Float64Array view.
229 pub fn object_data_f64(&self, index: usize) -> Result<js_sys::Float64Array, JsValue> {
230 let data = self.payload(index)?;
231 view_as_f64(data)
232 }
233
234 /// Zero-copy Int32Array view.
235 pub fn object_data_i32(&self, index: usize) -> Result<js_sys::Int32Array, JsValue> {
236 let data = self.payload(index)?;
237 view_as_i32(data)
238 }
239
240 /// Zero-copy Uint8Array view.
241 pub fn object_data_u8(&self, index: usize) -> Result<js_sys::Uint8Array, JsValue> {
242 let data = self.payload(index)?;
243 Ok(view_as_u8(data))
244 }
245
246 // ── Safe-copy variants ───────────────────────────────────────────────
247
248 /// Safe-copy Float32Array (JS-heap owned, survives WASM memory growth).
249 pub fn object_data_copy_f32(&self, index: usize) -> Result<js_sys::Float32Array, JsValue> {
250 let data = self.payload(index)?;
251 copy_as_f32(data)
252 }
253
254 /// Raw payload byte length for object at `index`.
255 pub fn object_byte_length(&self, index: usize) -> Result<usize, JsValue> {
256 Ok(self.payload(index)?.len())
257 }
258}
259
260impl DecodedMessage {
261 fn payload(&self, index: usize) -> Result<&[u8], JsValue> {
262 if index >= self.objects.len() {
263 return Err(JsValue::from(js_sys::Error::new(&format!(
264 "object index {index} out of range (have {})",
265 self.objects.len()
266 ))));
267 }
268 Ok(&self.objects[index].1)
269 }
270
271 /// Build a handle owning exactly one decoded object and an empty
272 /// `GlobalMetadata`. Used by `layout::decode_object_from_frame`
273 /// when the caller has fetched a single frame over HTTP Range and
274 /// will get its metadata separately (from the cached layout).
275 pub(crate) fn from_single_object(
276 descriptor: core::DataObjectDescriptor,
277 data: Vec<u8>,
278 ) -> Self {
279 Self {
280 metadata: core::GlobalMetadata::default(),
281 objects: vec![(descriptor, data)],
282 }
283 }
284}
285
286// ── StreamingDecoder re-export ───────────────────────────────────────────────
287
288pub use streaming::StreamingDecoder;
289
290// ── StreamingEncoder re-export ───────────────────────────────────────────────
291
292pub use encoder::StreamingEncoder;
293
294// ── Layout helpers (preamble, postamble, header/footer, single-frame) ───────
295
296pub use layout::{
297 decode_object_from_frame, decode_range_from_frame, parse_descriptor_cbor, parse_footer_chunk,
298 parse_header_chunk, read_data_object_frame_footer, read_data_object_frame_header,
299 read_postamble_info, read_preamble_info,
300};
301
302pub use remote_scan::{
303 parse_backward_postamble_outcome, parse_forward_preamble_outcome, same_message_check,
304 validate_backward_preamble_outcome,
305};
306
307// ── Scope-C exports (decode_range, compute_hash, validate, …) ───────────────
308
309pub use extras::{
310 compute_hash, decode_range, encode_pre_encoded, simple_packing_compute_params, validate_buffer,
311};
312
313// ── Doctor: environment diagnostics ──────────────────────────────────────────
314
315/// Collect environment diagnostics: build metadata, compiled-in feature
316/// states, and core encode/decode self-test results.
317///
318/// Mirrors the Rust `tensogram::doctor::run_diagnostics()` and the
319/// `tensogram doctor` CLI subcommand, returning a plain JS object whose
320/// shape matches the JSON schema documented in
321/// [`docs/src/cli/doctor.md`](https://sites.ecmwf.int/docs/tensogram/main/cli/doctor.html).
322///
323/// The WASM build does **not** run the GRIB or NetCDF converter
324/// self-tests — those features are CLI-only — so the `self_test` array
325/// covers only the core encode/decode pipeline plus the codecs that
326/// were compiled into this WASM bundle (typically `lz4`, `szip-pure`,
327/// and the `none` round-trip).
328///
329/// # Example
330///
331/// ```typescript
332/// import init, { doctor } from "@ecmwf.int/tensogram";
333/// await init();
334/// const report = doctor();
335/// console.log(report.build.version, report.build.target);
336/// for (const f of report.features) {
337/// console.log(f.name, f.state);
338/// }
339/// ```
340#[wasm_bindgen]
341pub fn doctor() -> Result<JsValue, JsValue> {
342 let report = tensogram::doctor::run_diagnostics();
343 convert::to_js(&report)
344}