Skip to main content

tensogram_wasm/
layout.rs

1// (C) Copyright 2026- ECMWF and individual contributors.
2//
3// This software is licensed under the terms of the Apache Licence Version 2.0
4// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5// In applying this licence, ECMWF does not waive the privileges and immunities
6// granted to it by virtue of its status as an intergovernmental organisation nor
7// does it submit to any jurisdiction.
8
9//! Wire-format layout helpers exposed to the TypeScript wrapper.
10//!
11//! These are thin bindings over public functions in the `tensogram`
12//! core crate.  Together they give the TS wrapper enough primitives to
13//! implement lazy per-object HTTP Range reads without re-implementing
14//! any wire-format parsing:
15//!
16//! - [`read_preamble_info`], [`read_postamble_info`] let TS inspect
17//!   one message header/footer without decoding frames.
18//! - [`parse_header_chunk`], [`parse_footer_chunk`] accept a larger
19//!   byte region (header 256 KB chunk, or footer suffix) and return
20//!   the metadata + index frames found in it.
21//! - [`read_data_object_frame_header`], [`read_data_object_frame_footer`],
22//!   [`parse_descriptor_cbor`] let TS implement the CBOR-prefix-only
23//!   descriptor optimisation for huge frames.
24//! - [`decode_object_from_frame`], [`decode_range_from_frame`] decode
25//!   a single frame once TS has fetched its bytes via Range.
26//!
27//! Every error is routed through [`crate::convert::js_err`] so the
28//! TypeScript wrapper's error mapper sees a consistent shape.
29
30use crate::DecodedMessage;
31use crate::convert::{js_err, metadata_to_js, to_js};
32use serde::Serialize;
33use tensogram::{
34    self as core, DecodeOptions,
35    metadata::{cbor_to_global_metadata, cbor_to_index, cbor_to_object_descriptor},
36    wire::{
37        DATA_OBJECT_FOOTER_SIZE, DataObjectFlags, FRAME_COMMON_FOOTER_SIZE, FRAME_END,
38        FRAME_HEADER_SIZE, FrameHeader, FrameType, MAGIC, MessageFlags, POSTAMBLE_SIZE,
39        PREAMBLE_SIZE, Postamble, Preamble,
40    },
41};
42use wasm_bindgen::prelude::*;
43
44/// Set `(metadata, index)` on `out` with the metadata routed through
45/// [`metadata_to_js`] so the wire-format `version` field gets
46/// synthesised.  Without this the lazy-backend output would silently
47/// return `metadata.version === undefined` while the eager-backend
48/// [`crate::decode_metadata`] path returned `metadata.version ===
49/// WIRE_VERSION` — a real cross-backend divergence the lazy
50/// `messageMetadata` / `messageObject` tests rely on being absent.
51fn set_metadata_and_index(
52    out: &js_sys::Object,
53    metadata: Option<core::GlobalMetadata>,
54    index: Option<IndexFrameJs>,
55) -> Result<(), JsValue> {
56    let meta_val = match metadata {
57        Some(m) => metadata_to_js(&m)?,
58        None => JsValue::NULL,
59    };
60    js_sys::Reflect::set(out, &JsValue::from_str("metadata"), &meta_val)
61        .map_err(|_| JsValue::from(js_sys::Error::new("internal: failed to set chunk.metadata")))?;
62    let index_val = match index {
63        Some(idx) => to_js(&idx)?,
64        None => JsValue::NULL,
65    };
66    js_sys::Reflect::set(out, &JsValue::from_str("index"), &index_val)
67        .map_err(|_| JsValue::from(js_sys::Error::new("internal: failed to set chunk.index")))?;
68    Ok(())
69}
70
71// ── Return shapes ────────────────────────────────────────────────────────────
72
73#[derive(Serialize)]
74struct PreambleInfoJs {
75    version: u16,
76    flags: u16,
77    total_length: u64,
78    has_header_metadata: bool,
79    has_header_index: bool,
80    has_footer_metadata: bool,
81    has_footer_index: bool,
82    has_preceder_metadata: bool,
83    hashes_present: bool,
84}
85
86#[derive(Serialize)]
87struct PostambleInfoJs {
88    first_footer_offset: u64,
89    total_length: u64,
90    end_magic_ok: bool,
91}
92
93#[derive(Serialize)]
94struct IndexFrameJs {
95    offsets: Vec<u64>,
96    lengths: Vec<u64>,
97}
98
99#[derive(Serialize)]
100struct FrameHeaderJs {
101    frame_type: u16,
102    version: u16,
103    flags: u16,
104    total_length: u64,
105    is_data_object: bool,
106    cbor_after_payload: bool,
107}
108
109#[derive(Serialize)]
110struct DataObjectFooterJs {
111    cbor_offset: u64,
112    hash_hex: String,
113    end_magic_ok: bool,
114}
115
116// ── Preamble / postamble ─────────────────────────────────────────────────────
117
118/// Inspect a wire-format preamble.
119///
120/// @param bytes - At least 24 bytes; the first 24 bytes of a message.
121#[wasm_bindgen]
122pub fn read_preamble_info(bytes: &[u8]) -> Result<JsValue, JsValue> {
123    if bytes.len() < PREAMBLE_SIZE {
124        return Err(JsValue::from(js_sys::Error::new(&format!(
125            "preamble buffer too short: {} < {PREAMBLE_SIZE}",
126            bytes.len()
127        ))));
128    }
129    let pre = Preamble::read_from(&bytes[..PREAMBLE_SIZE]).map_err(js_err)?;
130    let flags = pre.flags;
131    to_js(&PreambleInfoJs {
132        version: pre.version,
133        flags: flags.bits(),
134        total_length: pre.total_length,
135        has_header_metadata: flags.has(MessageFlags::HEADER_METADATA),
136        has_header_index: flags.has(MessageFlags::HEADER_INDEX),
137        has_footer_metadata: flags.has(MessageFlags::FOOTER_METADATA),
138        has_footer_index: flags.has(MessageFlags::FOOTER_INDEX),
139        has_preceder_metadata: flags.has(MessageFlags::PRECEDER_METADATA),
140        hashes_present: flags.has(MessageFlags::HASHES_PRESENT),
141    })
142}
143
144/// Inspect a wire-format postamble.
145///
146/// @param bytes - At least 24 bytes; typically the last 24 bytes of a message.
147#[wasm_bindgen]
148pub fn read_postamble_info(bytes: &[u8]) -> Result<JsValue, JsValue> {
149    if bytes.len() < POSTAMBLE_SIZE {
150        return Err(JsValue::from(js_sys::Error::new(&format!(
151            "postamble buffer too short: {} < {POSTAMBLE_SIZE}",
152            bytes.len()
153        ))));
154    }
155    let pa = Postamble::read_from(&bytes[bytes.len() - POSTAMBLE_SIZE..]).map_err(js_err)?;
156    let tail = &bytes[bytes.len() - core::wire::END_MAGIC.len()..];
157    to_js(&PostambleInfoJs {
158        first_footer_offset: pa.first_footer_offset,
159        total_length: pa.total_length,
160        end_magic_ok: tail == core::wire::END_MAGIC,
161    })
162}
163
164// ── Header / footer chunk parsers ────────────────────────────────────────────
165
166/// Parse header metadata + index from a chunk that starts at message
167/// offset 0 (the preamble).  Stops at the first data-object or
168/// preceder frame.  Returns nulls for frames that fall outside the
169/// supplied chunk so TS can decide to widen its Range fetch.
170#[wasm_bindgen]
171pub fn parse_header_chunk(chunk: &[u8]) -> Result<JsValue, JsValue> {
172    if chunk.len() < PREAMBLE_SIZE {
173        return Err(JsValue::from(js_sys::Error::new(&format!(
174            "header chunk too short: {} < {PREAMBLE_SIZE}",
175            chunk.len()
176        ))));
177    }
178    if &chunk[..MAGIC.len()] != MAGIC {
179        return Err(JsValue::from(js_sys::Error::new("header chunk does not start with TENSOGRM")));
180    }
181
182    let mut metadata: Option<core::GlobalMetadata> = None;
183    let mut index: Option<IndexFrameJs> = None;
184    let mut body_start: Option<u64> = None;
185
186    let mut pos = PREAMBLE_SIZE;
187    while pos + FRAME_HEADER_SIZE <= chunk.len() {
188        if &chunk[pos..pos + 2] != core::wire::FRAME_MAGIC {
189            pos += 1;
190            continue;
191        }
192        let fh = FrameHeader::read_from(&chunk[pos..]).map_err(js_err)?;
193        let total = match usize::try_from(fh.total_length) {
194            Ok(t) => t,
195            Err(_) => return Err(JsValue::from(js_sys::Error::new("frame total_length exceeds usize"))),
196        };
197        if total < FRAME_HEADER_SIZE + FRAME_END.len() {
198            return Err(JsValue::from(js_sys::Error::new("frame total_length below minimum")));
199        }
200        let frame_end = match pos.checked_add(total) {
201            Some(e) => e,
202            None => return Err(JsValue::from(js_sys::Error::new("frame end overflows"))),
203        };
204        if frame_end > chunk.len() {
205            break;
206        }
207
208        if matches!(
209            fh.frame_type,
210            FrameType::NTensorFrame | FrameType::PrecederMetadata
211        ) {
212            body_start = Some(pos as u64);
213            break;
214        }
215
216        if &chunk[frame_end - FRAME_END.len()..frame_end] != FRAME_END {
217            return Err(JsValue::from(js_sys::Error::new("header frame missing ENDF trailer")));
218        }
219
220        let payload = &chunk[pos + FRAME_HEADER_SIZE..frame_end - FRAME_COMMON_FOOTER_SIZE];
221        match fh.frame_type {
222            FrameType::HeaderMetadata => {
223                metadata = Some(cbor_to_global_metadata(payload).map_err(js_err)?);
224            }
225            FrameType::HeaderIndex => {
226                let idx = cbor_to_index(payload).map_err(js_err)?;
227                index = Some(IndexFrameJs {
228                    offsets: idx.offsets,
229                    lengths: idx.lengths,
230                });
231            }
232            _ => {}
233        }
234
235        let aligned = (frame_end + 7) & !7;
236        pos = aligned.min(chunk.len());
237    }
238
239    let out = js_sys::Object::new();
240    set_metadata_and_index(&out, metadata, index)?;
241    let body_start_val = match body_start {
242        Some(b) => JsValue::from(b),
243        None => JsValue::NULL,
244    };
245    js_sys::Reflect::set(&out, &JsValue::from_str("body_start"), &body_start_val)
246        .map_err(|_| JsValue::from(js_sys::Error::new("internal: failed to set header_chunk.body_start")))?;
247    Ok(out.into())
248}
249
250/// Parse footer metadata + index from a chunk that covers the footer
251/// region — i.e. `[first_footer_offset, message_end - POSTAMBLE_SIZE)`.
252#[wasm_bindgen]
253pub fn parse_footer_chunk(chunk: &[u8]) -> Result<JsValue, JsValue> {
254    let mut metadata: Option<core::GlobalMetadata> = None;
255    let mut index: Option<IndexFrameJs> = None;
256
257    let mut pos = 0usize;
258    while pos + FRAME_HEADER_SIZE <= chunk.len() {
259        if &chunk[pos..pos + 2] != core::wire::FRAME_MAGIC {
260            pos += 1;
261            continue;
262        }
263        let fh = FrameHeader::read_from(&chunk[pos..]).map_err(js_err)?;
264        let total = match usize::try_from(fh.total_length) {
265            Ok(t) => t,
266            Err(_) => return Err(JsValue::from(js_sys::Error::new("footer frame total_length exceeds usize"))),
267        };
268        if total < FRAME_HEADER_SIZE + FRAME_END.len() {
269            return Err(JsValue::from(js_sys::Error::new("footer frame total_length below minimum")));
270        }
271        let frame_end = match pos.checked_add(total) {
272            Some(e) if e <= chunk.len() => e,
273            _ => break,
274        };
275
276        if &chunk[frame_end - FRAME_END.len()..frame_end] != FRAME_END {
277            return Err(JsValue::from(js_sys::Error::new("footer frame missing ENDF trailer")));
278        }
279
280        let payload = &chunk[pos + FRAME_HEADER_SIZE..frame_end - FRAME_COMMON_FOOTER_SIZE];
281        match fh.frame_type {
282            FrameType::FooterMetadata => {
283                metadata = Some(cbor_to_global_metadata(payload).map_err(js_err)?);
284            }
285            FrameType::FooterIndex => {
286                let idx = cbor_to_index(payload).map_err(js_err)?;
287                index = Some(IndexFrameJs {
288                    offsets: idx.offsets,
289                    lengths: idx.lengths,
290                });
291            }
292            _ => {}
293        }
294
295        let aligned = (frame_end + 7) & !7;
296        pos = aligned.min(chunk.len());
297    }
298
299    let out = js_sys::Object::new();
300    set_metadata_and_index(&out, metadata, index)?;
301    Ok(out.into())
302}
303
304// ── Per-frame header + footer + descriptor CBOR ──────────────────────────────
305
306/// Parse the 16-byte header of a data-object frame.
307#[wasm_bindgen]
308pub fn read_data_object_frame_header(bytes: &[u8]) -> Result<JsValue, JsValue> {
309    if bytes.len() < FRAME_HEADER_SIZE {
310        return Err(JsValue::from(js_sys::Error::new(&format!(
311            "frame header buffer too short: {} < {FRAME_HEADER_SIZE}",
312            bytes.len()
313        ))));
314    }
315    let fh = FrameHeader::read_from(&bytes[..FRAME_HEADER_SIZE]).map_err(js_err)?;
316    to_js(&FrameHeaderJs {
317        frame_type: fh.frame_type as u16,
318        version: fh.version,
319        flags: fh.flags,
320        total_length: fh.total_length,
321        is_data_object: fh.frame_type.is_data_object(),
322        cbor_after_payload: fh.flags & DataObjectFlags::CBOR_AFTER_PAYLOAD != 0,
323    })
324}
325
326/// Parse the 20-byte footer of a data-object frame (type `NTensorFrame`).
327///
328/// Expects the caller to supply the last 20 bytes of the frame.  Returns
329/// the in-frame `cbor_offset` (where the descriptor CBOR lives) and the
330/// 64-bit frame hash, plus a sanity flag on the `ENDF` trailer.
331#[wasm_bindgen]
332pub fn read_data_object_frame_footer(bytes: &[u8]) -> Result<JsValue, JsValue> {
333    if bytes.len() < DATA_OBJECT_FOOTER_SIZE {
334        return Err(JsValue::from(js_sys::Error::new(&format!(
335            "data-object frame footer too short: {} < {DATA_OBJECT_FOOTER_SIZE}",
336            bytes.len()
337        ))));
338    }
339    let footer = &bytes[bytes.len() - DATA_OBJECT_FOOTER_SIZE..];
340    let cbor_offset = u64::from_be_bytes(
341        footer[..8]
342            .try_into()
343            .map_err(|_| JsValue::from(js_sys::Error::new("cbor_offset truncated")))?,
344    );
345    let hash = u64::from_be_bytes(
346        footer[8..16]
347            .try_into()
348            .map_err(|_| JsValue::from(js_sys::Error::new("hash truncated")))?,
349    );
350    let end = &footer[16..20];
351    to_js(&DataObjectFooterJs {
352        cbor_offset,
353        hash_hex: format!("{hash:016x}"),
354        end_magic_ok: end == FRAME_END,
355    })
356}
357
358/// Decode a `DataObjectDescriptor` from its raw CBOR bytes.
359#[wasm_bindgen]
360pub fn parse_descriptor_cbor(cbor_bytes: &[u8]) -> Result<JsValue, JsValue> {
361    let desc = cbor_to_object_descriptor(cbor_bytes).map_err(js_err)?;
362    to_js(&desc)
363}
364
365// ── Single-frame decode ──────────────────────────────────────────────────────
366
367/// Decode a single data-object frame's full bytes to a `DecodedMessage`
368/// that owns one decoded object.
369///
370/// @param verify_hash - Per-frame hash verification (default false).
371///                      See `crate::decode` for the contract.
372#[wasm_bindgen]
373pub fn decode_object_from_frame(
374    frame_bytes: &[u8],
375    restore_non_finite: Option<bool>,
376    verify_hash: Option<bool>,
377) -> Result<DecodedMessage, JsValue> {
378    let options = DecodeOptions {
379        restore_non_finite: restore_non_finite.unwrap_or(true),
380        verify_hash: verify_hash.unwrap_or(false),
381        ..Default::default()
382    };
383    let (desc, data) =
384        core::decode::decode_object_from_frame(frame_bytes, &options).map_err(js_err)?;
385    Ok(DecodedMessage::from_single_object(desc, data))
386}
387
388/// Extract ranges from a single data-object frame.
389///
390/// Mirrors [`crate::decode_range`] but takes one frame's bytes.
391#[wasm_bindgen]
392pub fn decode_range_from_frame(
393    frame_bytes: &[u8],
394    ranges: &js_sys::BigUint64Array,
395) -> Result<JsValue, JsValue> {
396    let flat: Vec<u64> = ranges.to_vec();
397    if !flat.len().is_multiple_of(2) {
398        return Err(JsValue::from(js_sys::Error::new(
399            "ranges length must be a multiple of 2 (flat [offset, count] pairs)",
400        )));
401    }
402    let range_pairs: Vec<(u64, u64)> = flat.chunks_exact(2).map(|w| (w[0], w[1])).collect();
403
404    let options = DecodeOptions {
405        ..Default::default()
406    };
407    let (descriptor, parts) =
408        core::decode::decode_range_from_frame(frame_bytes, &range_pairs, &options)
409            .map_err(js_err)?;
410
411    let result = js_sys::Object::new();
412    js_sys::Reflect::set(&result, &"descriptor".into(), &to_js(&descriptor)?)
413        .map_err(|_| JsValue::from(js_sys::Error::new("failed to set descriptor")))?;
414    let parts_js = js_sys::Array::new_with_length(parts.len() as u32);
415    for (i, bytes) in parts.iter().enumerate() {
416        parts_js.set(i as u32, js_sys::Uint8Array::from(bytes.as_slice()).into());
417    }
418    js_sys::Reflect::set(&result, &"parts".into(), &parts_js)
419        .map_err(|_| JsValue::from(js_sys::Error::new("failed to set parts")))?;
420    Ok(result.into())
421}