Skip to main content

tensogram_wasm/
lib.rs

1// (C) Copyright 2026- ECMWF and individual contributors.
2//
3// This software is licensed under the terms of the Apache Licence Version 2.0
4// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5// In applying this licence, ECMWF does not waive the privileges and immunities
6// granted to it by virtue of its status as an intergovernmental organisation nor
7// does it submit to any jurisdiction.
8
9//! WebAssembly bindings for the Tensogram N-tensor message format.
10//!
11//! Provides encode, decode, scan, streaming decode, range decode, hash,
12//! validation, pre-encoded encode, `simple_packing` params, and a
13//! frame-at-a-time `StreamingEncoder` — accessible from JavaScript /
14//! TypeScript via `wasm-bindgen`.
15//!
16//! Tensor payloads are returned as zero-copy TypedArray views into
17//! WASM linear memory for 60fps visualisation performance.
18//!
19//! # Compressor support
20//!
21//! This WASM build supports lz4, szip (pure-Rust), and zstd (pure-Rust).
22//! Attempts to decode blosc2/zfp/sz3 compressed data will return an error.
23
24mod convert;
25mod encoder;
26mod extras;
27mod layout;
28mod remote_scan;
29mod streaming;
30
31use convert::*;
32use tensogram::{self as core, DecodeOptions};
33use wasm_bindgen::prelude::*;
34
35// ── Decode API ───────────────────────────────────────────────────────────────
36
37/// Decode all objects from a complete Tensogram message.
38///
39/// Returns a `DecodedMessage` handle that owns the decoded data.
40/// Use `.object_data_f32(i)` etc. to get zero-copy TypedArray views
41/// into the decoded payloads.
42///
43/// @param buf - Raw .tgm message bytes
44/// @param restore_non_finite - When true (default), decode writes canonical
45///                             NaN / +Inf / -Inf at positions recorded in
46///                             the frame's mask companion.  Set to false to
47///                             receive 0.0-substituted bytes as on disk.
48/// @param verify_hash - When true, verify each data-object frame's
49///                      inline xxh3 hash against the recomputed
50///                      digest.  Default false (opt-in).
51///                      Integrity failures are returned as a
52///                      `JsValue` carrying a thrown `js_sys::Error`
53///                      with structured properties attached: `name`
54///                      is `"MissingHashError"` (when the per-frame
55///                      `HASH_PRESENT` flag is clear) or
56///                      `"HashMismatchError"` (when the slot
57///                      disagrees), and `objectIndex` /
58///                      `expected` / `actual` carry the structured
59///                      payload that the TS wrapper routes to
60///                      dedicated error classes.  See
61///                      `plans/DESIGN.md` §"Integrity Hashing".
62#[wasm_bindgen]
63pub fn decode(
64    buf: &[u8],
65    restore_non_finite: Option<bool>,
66    verify_hash: Option<bool>,
67) -> Result<DecodedMessage, JsValue> {
68    let options = DecodeOptions {
69        restore_non_finite: restore_non_finite.unwrap_or(true),
70        verify_hash: verify_hash.unwrap_or(false),
71        ..Default::default()
72    };
73    let (metadata, objects) = core::decode(buf, &options).map_err(js_err)?;
74    Ok(DecodedMessage { metadata, objects })
75}
76
77/// Decode only the global metadata from a message (no payload decoding).
78///
79/// @param buf - Raw .tgm message bytes
80/// @returns Plain JS object with version (synthesised from the
81///   preamble), base, _reserved_, _extra_ fields
82#[wasm_bindgen]
83pub fn decode_metadata(buf: &[u8]) -> Result<JsValue, JsValue> {
84    let meta = core::decode_metadata(buf).map_err(js_err)?;
85    metadata_to_js(&meta)
86}
87
88/// Decode a single object by index.
89///
90/// @param buf - Raw .tgm message bytes
91/// @param index - Zero-based object index
92/// @param restore_non_finite - Restore canonical NaN / Inf from mask companion (default: true)
93/// @param verify_hash - Per-frame hash verification (default false).
94///                      See `decode` for the full contract.
95#[wasm_bindgen]
96pub fn decode_object(
97    buf: &[u8],
98    index: usize,
99    restore_non_finite: Option<bool>,
100    verify_hash: Option<bool>,
101) -> Result<DecodedMessage, JsValue> {
102    let options = DecodeOptions {
103        restore_non_finite: restore_non_finite.unwrap_or(true),
104        verify_hash: verify_hash.unwrap_or(false),
105        ..Default::default()
106    };
107    let (metadata, descriptor, data) = core::decode_object(buf, index, &options).map_err(js_err)?;
108    Ok(DecodedMessage {
109        metadata,
110        objects: vec![(descriptor, data)],
111    })
112}
113
114/// Scan a buffer for concatenated Tensogram messages.
115///
116/// Returns an array of `[offset, length]` pairs for each message found.
117///
118/// @param buf - Buffer potentially containing multiple .tgm messages
119/// @returns Array of [offset, length] pairs
120#[wasm_bindgen]
121pub fn scan(buf: &[u8]) -> Result<JsValue, JsValue> {
122    let positions = core::scan(buf);
123    to_js(&positions)
124}
125
126// ── Encode API ───────────────────────────────────────────────────────────────
127
128/// Encode objects into a Tensogram message.
129///
130/// @param metadata_js - GlobalMetadata as a plain JS object
131/// @param objects_js - Array of {descriptor, data} objects where data is a TypedArray
132/// @param hash - Whether to compute integrity hashes (default: true)
133/// @param allow_nan - When true, substitute NaN with 0 and record
134///                    positions in a mask companion frame (default: false)
135/// @param allow_inf - When true, substitute +Inf / -Inf with 0 and
136///                    record positions in per-sign masks (default: false)
137/// @param nan_mask_method - Mask compression method for the NaN mask
138/// @param pos_inf_mask_method - Mask compression method for the +Inf mask
139/// @param neg_inf_mask_method - Mask compression method for the -Inf mask
140/// @param small_mask_threshold_bytes - Mask size below which method="none" is forced (default: 128)
141/// @returns Uint8Array containing the encoded .tgm message
142#[wasm_bindgen]
143#[allow(clippy::too_many_arguments)]
144pub fn encode(
145    metadata_js: JsValue,
146    objects_js: js_sys::Array,
147    hash: Option<bool>,
148    allow_nan: Option<bool>,
149    allow_inf: Option<bool>,
150    nan_mask_method: Option<String>,
151    pos_inf_mask_method: Option<String>,
152    neg_inf_mask_method: Option<String>,
153    small_mask_threshold_bytes: Option<usize>,
154) -> Result<js_sys::Uint8Array, JsValue> {
155    let metadata = metadata_from_js(&metadata_js)?;
156    let (descriptors, data_vec) = extract_descriptor_data_pairs(&objects_js)?;
157    let pairs: Vec<(&core::DataObjectDescriptor, &[u8])> = descriptors
158        .iter()
159        .zip(data_vec.iter())
160        .map(|(d, v)| (d, v.as_slice()))
161        .collect();
162    let options = build_encode_options_full(
163        hash,
164        allow_nan,
165        allow_inf,
166        nan_mask_method.as_deref(),
167        pos_inf_mask_method.as_deref(),
168        neg_inf_mask_method.as_deref(),
169        small_mask_threshold_bytes,
170    )?;
171    let encoded = core::encode(&metadata, &pairs, &options).map_err(js_err)?;
172    // Return a JS-owned copy.  We must not use `view_as_u8` here because
173    // `encoded` is a local Vec that will be dropped when this function
174    // returns — a view into it would be a dangling pointer.
175    Ok(js_sys::Uint8Array::from(encoded.as_slice()))
176}
177
178// ── DecodedMessage handle ────────────────────────────────────────────────────
179
180/// Handle to a decoded Tensogram message.
181///
182/// Owns the decoded payload data in WASM linear memory.  Use the
183/// `object_data_*` methods to get zero-copy TypedArray views.
184///
185/// **Important**: The returned TypedArray views are invalidated if WASM
186/// memory grows.  Read or copy the data before further WASM calls.
187/// Call `.free()` when done to release WASM memory.
188#[wasm_bindgen]
189pub struct DecodedMessage {
190    metadata: core::GlobalMetadata,
191    objects: Vec<core::DecodedObject>,
192}
193
194#[wasm_bindgen]
195impl DecodedMessage {
196    /// Global metadata as a plain JS object.  The wire-format
197    /// `version` is synthesised from the preamble (v3: always `3`)
198    /// for TypeScript ergonomics — see `metadata_to_js` in
199    /// `convert.rs`.
200    pub fn metadata(&self) -> Result<JsValue, JsValue> {
201        metadata_to_js(&self.metadata)
202    }
203
204    /// Number of data objects in the message.
205    pub fn object_count(&self) -> usize {
206        self.objects.len()
207    }
208
209    /// Object descriptor (shape, dtype, encoding, etc.) as a JS object.
210    pub fn object_descriptor(&self, index: usize) -> Result<JsValue, JsValue> {
211        // Reuse payload() for the bounds check so the error message is consistent.
212        let _ = self.payload(index)?;
213        to_js(&self.objects[index].0)
214    }
215
216    // ── Zero-copy TypedArray views ───────────────────────────────────────
217
218    /// Zero-copy Float32Array view into the decoded payload.
219    ///
220    /// **Warning**: This view points directly into WASM linear memory.
221    /// It becomes invalid if WASM memory grows.  Read the data or pass
222    /// it to WebGL before any further WASM calls.
223    pub fn object_data_f32(&self, index: usize) -> Result<js_sys::Float32Array, JsValue> {
224        let data = self.payload(index)?;
225        view_as_f32(data)
226    }
227
228    /// Zero-copy Float64Array view.
229    pub fn object_data_f64(&self, index: usize) -> Result<js_sys::Float64Array, JsValue> {
230        let data = self.payload(index)?;
231        view_as_f64(data)
232    }
233
234    /// Zero-copy Int32Array view.
235    pub fn object_data_i32(&self, index: usize) -> Result<js_sys::Int32Array, JsValue> {
236        let data = self.payload(index)?;
237        view_as_i32(data)
238    }
239
240    /// Zero-copy Uint8Array view.
241    pub fn object_data_u8(&self, index: usize) -> Result<js_sys::Uint8Array, JsValue> {
242        let data = self.payload(index)?;
243        Ok(view_as_u8(data))
244    }
245
246    // ── Safe-copy variants ───────────────────────────────────────────────
247
248    /// Safe-copy Float32Array (JS-heap owned, survives WASM memory growth).
249    pub fn object_data_copy_f32(&self, index: usize) -> Result<js_sys::Float32Array, JsValue> {
250        let data = self.payload(index)?;
251        copy_as_f32(data)
252    }
253
254    /// Raw payload byte length for object at `index`.
255    pub fn object_byte_length(&self, index: usize) -> Result<usize, JsValue> {
256        Ok(self.payload(index)?.len())
257    }
258}
259
260impl DecodedMessage {
261    fn payload(&self, index: usize) -> Result<&[u8], JsValue> {
262        if index >= self.objects.len() {
263            return Err(JsValue::from(js_sys::Error::new(&format!(
264                "object index {index} out of range (have {})",
265                self.objects.len()
266            ))));
267        }
268        Ok(&self.objects[index].1)
269    }
270
271    /// Build a handle owning exactly one decoded object and an empty
272    /// `GlobalMetadata`.  Used by `layout::decode_object_from_frame`
273    /// when the caller has fetched a single frame over HTTP Range and
274    /// will get its metadata separately (from the cached layout).
275    pub(crate) fn from_single_object(
276        descriptor: core::DataObjectDescriptor,
277        data: Vec<u8>,
278    ) -> Self {
279        Self {
280            metadata: core::GlobalMetadata::default(),
281            objects: vec![(descriptor, data)],
282        }
283    }
284}
285
286// ── StreamingDecoder re-export ───────────────────────────────────────────────
287
288pub use streaming::StreamingDecoder;
289
290// ── StreamingEncoder re-export ───────────────────────────────────────────────
291
292pub use encoder::StreamingEncoder;
293
294// ── Layout helpers (preamble, postamble, header/footer, single-frame) ───────
295
296pub use layout::{
297    decode_object_from_frame, decode_range_from_frame, parse_descriptor_cbor, parse_footer_chunk,
298    parse_header_chunk, read_data_object_frame_footer, read_data_object_frame_header,
299    read_postamble_info, read_preamble_info,
300};
301
302pub use remote_scan::{
303    parse_backward_postamble_outcome, parse_forward_preamble_outcome, same_message_check,
304    validate_backward_preamble_outcome,
305};
306
307// ── Scope-C exports (decode_range, compute_hash, validate, …) ───────────────
308
309pub use extras::{
310    compute_hash, decode_range, encode_pre_encoded, simple_packing_compute_params, validate_buffer,
311};
312
313// ── Doctor: environment diagnostics ──────────────────────────────────────────
314
315/// Collect environment diagnostics: build metadata, compiled-in feature
316/// states, and core encode/decode self-test results.
317///
318/// Mirrors the Rust `tensogram::doctor::run_diagnostics()` and the
319/// `tensogram doctor` CLI subcommand, returning a plain JS object whose
320/// shape matches the JSON schema documented in
321/// [`docs/src/cli/doctor.md`](https://sites.ecmwf.int/docs/tensogram/main/cli/doctor.html).
322///
323/// The WASM build does **not** run the GRIB or NetCDF converter
324/// self-tests — those features are CLI-only — so the `self_test` array
325/// covers only the core encode/decode pipeline plus the codecs that
326/// were compiled into this WASM bundle (typically `lz4`, `szip-pure`,
327/// and the `none` round-trip).
328///
329/// # Example
330///
331/// ```typescript
332/// import init, { doctor } from "@ecmwf.int/tensogram";
333/// await init();
334/// const report = doctor();
335/// console.log(report.build.version, report.build.target);
336/// for (const f of report.features) {
337///     console.log(f.name, f.state);
338/// }
339/// ```
340#[wasm_bindgen]
341pub fn doctor() -> Result<JsValue, JsValue> {
342    let report = tensogram::doctor::run_diagnostics();
343    convert::to_js(&report)
344}