Skip to main content

tensogram_ffi/
lib.rs

1// (C) Copyright 2026- ECMWF and individual contributors.
2//
3// This software is licensed under the terms of the Apache Licence Version 2.0
4// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5// In applying this licence, ECMWF does not waive the privileges and immunities
6// granted to it by virtue of its status as an intergovernmental organisation nor
7// does it submit to any jurisdiction.
8
9// FFI functions accept raw pointers by design — callers are responsible for
10// validity. Marking every extern "C" fn as `unsafe` would be correct but
11// makes cbindgen emit ugly signatures with no benefit to C callers.
12#![allow(clippy::not_unsafe_ptr_arg_deref)]
13
14//! Tensogram C FFI
15//!
16//! Exposes the tensogram library to C and C++ callers via opaque handles,
17//! typed accessor functions, and a flat C ABI.
18//!
19//! Memory ownership rules:
20//! - Handles returned by `tgm_*` functions are owned by the caller.
21//!   Free them with the matching `tgm_*_free` function.
22//! - Pointers returned by accessor functions (e.g. `tgm_object_shape`) are
23//!   borrowed from the handle and valid until the handle is freed.
24//! - `tgm_bytes_t` returned by encode functions must be freed with `tgm_bytes_free`.
25//!
26//! ## JSON schema for `tgm_encode`
27//!
28//! The `metadata_json` argument to `tgm_encode` is a JSON object with:
29//! - `"descriptors"` (array, required): one entry per data object. Each entry
30//!   merges tensor info and encoding pipeline info into a single object:
31//!   `type`, `ndim`, `shape`, `strides`, `dtype`, `byte_order`, `encoding`,
32//!   `filter`, `compression`. Additional keys are stored as params.
33//! - `"base"` (array, optional): per-object application metadata.
34//! - Any other top-level keys (e.g. `"mars"`) are stored under the
35//!   message-level `_extra_` map.  The CBOR metadata frame is free-form —
36//!   the wire-format version lives exclusively in the preamble (see
37//!   `plans/WIRE_FORMAT.md` §3) and must NOT be supplied by callers.  A
38//!   legacy `"version"` top-level field is tolerated for pre-0.17 schema
39//!   compatibility and silently discarded.
40
41use std::collections::BTreeMap;
42use std::ffi::{CStr, CString};
43use std::os::raw::c_char;
44use std::path::Path;
45use std::ptr;
46use std::slice;
47
48use tensogram::encode::MaskMethod;
49use tensogram::validate::{
50    ValidateOptions, ValidationLevel, validate_file as core_validate_file, validate_message,
51};
52use tensogram::{
53    DataObjectDescriptor, DecodeOptions, EncodeOptions, GlobalMetadata, RESERVED_KEY,
54    StreamingEncoder, TensogramError, TensogramFile, decode, decode_metadata, decode_object,
55    decode_range, encode, encode_pre_encoded, parse_hash_name, scan,
56};
57
58// ---------------------------------------------------------------------------
59// Constants
60// ---------------------------------------------------------------------------
61
62/// Wire-format version emitted and required by this build of the
63/// library.
64///
65/// Mirrors [`tensogram::WIRE_VERSION`].  The value lives in the
66/// tensogram **preamble** (see `plans/WIRE_FORMAT.md` §3) — never in
67/// the CBOR metadata frame.  Exposed here so C / C++ callers can
68/// reference it without decoding a message first.  cbindgen emits
69/// this as a `#define TGM_WIRE_VERSION 3` in the generated header.
70///
71/// The literal is mirrored from `tensogram::wire::WIRE_VERSION`
72/// because cbindgen source-parses this crate and cannot resolve
73/// cross-crate constant expressions; the [`const _: () = assert!`]
74/// below keeps the two in lockstep at compile time.
75pub const TGM_WIRE_VERSION: u16 = 3;
76const _: () = assert!(
77    TGM_WIRE_VERSION == tensogram::WIRE_VERSION,
78    "TGM_WIRE_VERSION must equal tensogram::WIRE_VERSION"
79);
80
81// ---------------------------------------------------------------------------
82// Error codes
83// ---------------------------------------------------------------------------
84
85#[repr(C)]
86pub enum TgmError {
87    Ok = 0,
88    Framing = 1,
89    Metadata = 2,
90    Encoding = 3,
91    Compression = 4,
92    Object = 5,
93    Io = 6,
94    HashMismatch = 7,
95    InvalidArg = 8,
96    /// Returned by `tgm_*_iter_next` when iteration is exhausted.
97    EndOfIter = 9,
98    Remote = 10,
99    /// Decode-time hash verification was requested but the frame's
100    /// `HASH_PRESENT` flag is clear (see `plans/WIRE_FORMAT.md` §2.5).
101    /// Distinct from `HashMismatch` — the digest didn't disagree;
102    /// there was no digest recorded to compare against.  The
103    /// offending object index is available through
104    /// `tgm_last_error_object_index()` for the duration of the
105    /// thread-local error.
106    MissingHash = 11,
107}
108
109fn to_error_code(e: &TensogramError) -> TgmError {
110    match e {
111        TensogramError::Framing(_) => TgmError::Framing,
112        TensogramError::Metadata(_) => TgmError::Metadata,
113        TensogramError::Encoding(_) => TgmError::Encoding,
114        TensogramError::Compression(_) => TgmError::Compression,
115        TensogramError::Object(_) => TgmError::Object,
116        TensogramError::Io(_) => TgmError::Io,
117        TensogramError::HashMismatch { .. } => TgmError::HashMismatch,
118        TensogramError::MissingHash { .. } => TgmError::MissingHash,
119        TensogramError::Remote(_) => TgmError::Remote,
120        // `TensogramError` is `#[non_exhaustive]` so future variants
121        // can land without breaking this mapping at compile time.
122        // Until a more specific FFI code is needed, route unknown
123        // variants to the catch-all `Encoding` bucket — every
124        // existing variant maps cleanly today, and any new variant
125        // surfaces as a stable error code that callers can still
126        // handle generically through `tgm_last_error()`.
127        _ => TgmError::Encoding,
128    }
129}
130
131// Thread-local storage for the last error message.
132thread_local! {
133    static LAST_ERROR: std::cell::RefCell<Option<CString>> = const { std::cell::RefCell::new(None) };
134}
135
136fn set_last_error(msg: &str) {
137    LAST_ERROR.with(|cell| {
138        *cell.borrow_mut() = CString::new(msg).ok();
139    });
140}
141
142/// Returns a pointer to the last error message, or NULL if no error.
143/// The pointer is valid until the next FFI call on the same thread.
144#[unsafe(no_mangle)]
145pub extern "C" fn tgm_last_error() -> *const c_char {
146    LAST_ERROR.with(|cell| {
147        cell.borrow()
148            .as_ref()
149            .map(|s| s.as_ptr())
150            .unwrap_or(ptr::null())
151    })
152}
153
154// ---------------------------------------------------------------------------
155// Byte buffer
156// ---------------------------------------------------------------------------
157
158/// An owned byte buffer returned by encode functions.
159#[repr(C)]
160pub struct TgmBytes {
161    pub data: *mut u8,
162    pub len: usize,
163}
164
165/// Mask-companion options for encode entry points (see
166/// `plans/WIRE_FORMAT.md` §6.5 and `docs/src/guide/nan-inf-handling.md`).
167/// Pass a pointer to this struct to opt into NaN / ±Inf substitution
168/// with bitmask companion frames.
169///
170/// Each `*_mask_method` string is one of `"none"`, `"rle"`,
171/// `"roaring"`, `"lz4"`, `"zstd"`, or `"blosc2"`; pass `NULL` to use
172/// the library default (`"roaring"`).  Unknown names cause the
173/// owning `tgm_*_with_options` call to return
174/// [`TgmError::InvalidArg`] with a clear message via
175/// [`tgm_last_error`].
176///
177/// `small_mask_threshold_bytes` is the byte-count below which mask
178/// blobs are written as `"none"` regardless of the requested method
179/// (auto-fallback).  Pass `0` to disable the fallback.  Negative
180/// values use the library default (128).
181#[repr(C)]
182pub struct TgmEncodeMaskOptions {
183    pub allow_nan: bool,
184    pub allow_inf: bool,
185    pub nan_mask_method: *const c_char,
186    pub pos_inf_mask_method: *const c_char,
187    pub neg_inf_mask_method: *const c_char,
188    pub small_mask_threshold_bytes: isize,
189}
190
191/// Parse one of the optional C-string mask-method fields into a Rust
192/// [`MaskMethod`].  Returns the caller-supplied default on `NULL`,
193/// an `Err` naming the offending value (and accepted alternatives)
194/// for invalid UTF-8 or unknown names.
195///
196/// # Safety
197///
198/// `ptr` must either be `NULL` or point to a NUL-terminated UTF-8
199/// string with a valid Rust-bound lifetime.
200unsafe fn parse_mask_method_cstr(
201    ptr: *const c_char,
202    default: MaskMethod,
203) -> Result<MaskMethod, String> {
204    if ptr.is_null() {
205        return Ok(default);
206    }
207    let s = unsafe { CStr::from_ptr(ptr) }
208        .to_str()
209        .map_err(|_| "mask method name is not valid UTF-8".to_string())?;
210    MaskMethod::from_name(s).map_err(|e| e.to_string())
211}
212
213/// Apply the optional [`TgmEncodeMaskOptions`] pointer to an
214/// [`EncodeOptions`].  `NULL` is a no-op.  Returns an error message
215/// (routed to [`set_last_error`] by the caller) when a method name
216/// is invalid UTF-8 or unknown.
217///
218/// # Safety
219///
220/// `opts` must either be `NULL` or point to a valid
221/// `TgmEncodeMaskOptions` whose `*_mask_method` fields satisfy
222/// [`parse_mask_method_cstr`]'s safety contract.
223unsafe fn apply_mask_options(
224    encode_opts: &mut EncodeOptions,
225    opts: *const TgmEncodeMaskOptions,
226) -> Result<(), String> {
227    if opts.is_null() {
228        return Ok(());
229    }
230    let opts = unsafe { &*opts };
231    encode_opts.allow_nan = opts.allow_nan;
232    encode_opts.allow_inf = opts.allow_inf;
233    encode_opts.nan_mask_method =
234        unsafe { parse_mask_method_cstr(opts.nan_mask_method, MaskMethod::default())? };
235    encode_opts.pos_inf_mask_method =
236        unsafe { parse_mask_method_cstr(opts.pos_inf_mask_method, MaskMethod::default())? };
237    encode_opts.neg_inf_mask_method =
238        unsafe { parse_mask_method_cstr(opts.neg_inf_mask_method, MaskMethod::default())? };
239    if opts.small_mask_threshold_bytes >= 0 {
240        encode_opts.small_mask_threshold_bytes = opts.small_mask_threshold_bytes as usize;
241    }
242    Ok(())
243}
244
245/// Decode-side companion to [`TgmEncodeMaskOptions`].  Pass a pointer
246/// to opt out of canonical NaN / Inf restoration.  Pass `NULL` for
247/// the default `restore_non_finite = true`.
248#[repr(C)]
249pub struct TgmDecodeMaskOptions {
250    pub restore_non_finite: bool,
251}
252
253/// Apply the optional [`TgmDecodeMaskOptions`] pointer to a
254/// [`DecodeOptions`].  `NULL` is a no-op.
255///
256/// # Safety
257///
258/// `opts` must either be `NULL` or point to a valid
259/// `TgmDecodeMaskOptions`.
260unsafe fn apply_decode_mask_options(
261    decode_opts: &mut DecodeOptions,
262    opts: *const TgmDecodeMaskOptions,
263) {
264    if opts.is_null() {
265        return;
266    }
267    let opts = unsafe { &*opts };
268    decode_opts.restore_non_finite = opts.restore_non_finite;
269}
270
271/// Free a byte buffer returned by `tgm_encode`.
272#[unsafe(no_mangle)]
273pub extern "C" fn tgm_bytes_free(buf: TgmBytes) {
274    if !buf.data.is_null() {
275        unsafe {
276            drop(Vec::from_raw_parts(buf.data, buf.len, buf.len));
277        }
278    }
279}
280
281// ---------------------------------------------------------------------------
282// Opaque handles
283// ---------------------------------------------------------------------------
284
285/// Decoded message: global metadata + decoded (descriptor, payload) pairs.
286pub struct TgmMessage {
287    global_metadata: GlobalMetadata,
288    /// Each entry pairs a per-object descriptor with its decoded payload bytes.
289    objects: Vec<(DataObjectDescriptor, Vec<u8>)>,
290    /// Cached CStrings for dtype accessor returns (parallel to `objects`).
291    dtype_strings: Vec<CString>,
292    /// Cached CStrings for object type accessor returns.
293    type_strings: Vec<CString>,
294    /// Cached CStrings for byte order accessor returns.
295    byte_order_strings: Vec<CString>,
296    /// Cached CStrings for filter accessor returns.
297    filter_strings: Vec<CString>,
298    /// Cached CStrings for compression accessor returns.
299    compression_strings: Vec<CString>,
300    /// Cached CStrings for encoding accessor returns.
301    encoding_strings: Vec<CString>,
302    /// Cached CStrings for hash type accessor returns (None when no hash).
303    hash_type_strings: Vec<Option<CString>>,
304    /// Cached CStrings for hash value accessor returns (None when no hash).
305    hash_value_strings: Vec<Option<CString>>,
306}
307
308/// Metadata-only handle (no decoded payloads).
309pub struct TgmMetadata {
310    global_metadata: GlobalMetadata,
311    /// Cache for string accessors (key → null-terminated value).
312    cache: std::cell::RefCell<BTreeMap<String, CString>>,
313}
314
315/// File handle.
316pub struct TgmFile {
317    file: TensogramFile,
318    /// Cached path string for `tgm_file_path`.
319    path_string: CString,
320}
321
322/// Scan result: array of (offset, length) pairs.
323#[repr(C)]
324#[derive(Clone, Copy)]
325pub struct TgmScanEntry {
326    pub offset: usize,
327    pub length: usize,
328}
329
330/// Opaque handle for scan results.
331pub struct TgmScanResult {
332    entries: Vec<TgmScanEntry>,
333}
334
335// ---------------------------------------------------------------------------
336// JSON deserialization helpers for the encode API
337// ---------------------------------------------------------------------------
338
339/// Intermediate struct used to parse the flat JSON provided to `tgm_encode`.
340///
341/// The caller passes a single JSON object that contains both global metadata
342/// fields (any application-specific namespaced keys such as `"mars"`) and a
343/// `"descriptors"` array of per-object descriptor objects.
344///
345/// A legacy `"version"` field is tolerated (parsed and then discarded); the
346/// wire-format version lives in the preamble (see
347/// `plans/WIRE_FORMAT.md` §3) and is not settable by callers.
348#[derive(serde::Deserialize)]
349struct EncodeJson {
350    /// Legacy field: pre-0.17 callers wrote `{"version": 3, …}`.  v3
351    /// ignores the value — it's parsed solely so the JSON schema stays
352    /// backwards-compatible for third-party tools.  `None` means the
353    /// modern, free-form schema was used.
354    #[serde(default)]
355    version: Option<u16>,
356    #[serde(default)]
357    descriptors: Vec<DataObjectDescriptor>,
358    /// Per-object metadata array (one entry per data object).
359    #[serde(default)]
360    base: Vec<BTreeMap<String, serde_json::Value>>,
361    /// All remaining top-level keys become `GlobalMetadata::extra`.
362    #[serde(flatten)]
363    extra: BTreeMap<String, serde_json::Value>,
364}
365
366/// Convert a `serde_json::Value` to a `ciborium::Value` for storage in
367/// `GlobalMetadata::extra`.
368fn json_to_cbor(v: serde_json::Value) -> ciborium::Value {
369    match v {
370        serde_json::Value::Null => ciborium::Value::Null,
371        serde_json::Value::Bool(b) => ciborium::Value::Bool(b),
372        serde_json::Value::Number(n) => {
373            if let Some(i) = n.as_i64() {
374                ciborium::Value::Integer(i.into())
375            } else if let Some(f) = n.as_f64() {
376                ciborium::Value::Float(f)
377            } else {
378                ciborium::Value::Null
379            }
380        }
381        serde_json::Value::String(s) => ciborium::Value::Text(s),
382        serde_json::Value::Array(arr) => {
383            ciborium::Value::Array(arr.into_iter().map(json_to_cbor).collect())
384        }
385        serde_json::Value::Object(map) => ciborium::Value::Map(
386            map.into_iter()
387                .map(|(k, v)| (ciborium::Value::Text(k), json_to_cbor(v)))
388                .collect(),
389        ),
390    }
391}
392
393/// Parse the flat JSON blob into a `GlobalMetadata` and a list of
394/// `DataObjectDescriptor`s.
395///
396/// The `"descriptors"` and `"base"` keys are consumed; all remaining
397/// keys — including a legacy top-level `"version"` — are forwarded
398/// into `GlobalMetadata::extra` as CBOR values.  The wire-format
399/// version itself lives in the preamble (see `plans/WIRE_FORMAT.md`
400/// §§3, 6.1); a caller-supplied `version` is treated as a free-form
401/// annotation, matching the Python / TypeScript / Rust-core contract.
402/// On key collision with an explicit `"_extra_"` / `"extra"` entry,
403/// the explicit entry wins — "explicit beats implicit".
404fn parse_encode_json(
405    json_str: &str,
406) -> Result<(GlobalMetadata, Vec<DataObjectDescriptor>), String> {
407    let parsed: EncodeJson = serde_json::from_str(json_str)
408        .map_err(|e| format!("failed to parse metadata JSON: {e}"))?;
409
410    let cbor_base: Vec<BTreeMap<String, ciborium::Value>> = parsed
411        .base
412        .into_iter()
413        .map(|entry| {
414            entry
415                .into_iter()
416                .map(|(k, v)| (k, json_to_cbor(v)))
417                .collect()
418        })
419        .collect();
420
421    // Validate: no _reserved_ keys in base entries (library-managed namespace)
422    for (i, entry) in cbor_base.iter().enumerate() {
423        if entry.contains_key(RESERVED_KEY) {
424            return Err(format!(
425                "base[{i}] must not contain '{RESERVED_KEY}' key — the encoder populates it"
426            ));
427        }
428    }
429
430    let cbor_extra = merge_flattened_extras_with_version(parsed.extra, parsed.version)?;
431
432    let global_metadata = GlobalMetadata {
433        base: cbor_base,
434        extra: cbor_extra,
435        ..Default::default()
436    };
437
438    Ok((global_metadata, parsed.descriptors))
439}
440
441/// Merge a flattened-catch-all JSON map and an optional legacy
442/// `version` integer into a single `_extra_` CBOR map under the
443/// free-form contract.
444///
445/// The `#[serde(flatten)]` catch-all sees any explicit
446/// `"_extra_"` / `"extra"` the caller supplied as just another
447/// top-level key.  To match the Python / TypeScript / Rust-core
448/// contract, we pull the explicit section out *first* and use it as
449/// the authoritative source; every other flattened key becomes a
450/// free-form entry that only fills slots the explicit section did
451/// not already claim.  `version` is handled the same way —
452/// `explicit beats implicit`.
453///
454/// The `"extra"` convenience alias (no underscores) is accepted for
455/// parity with the Python binding, which has supported both
456/// spellings since the 0.6 metadata refactor.  Using both is an
457/// error (ambiguous).
458fn merge_flattened_extras_with_version(
459    mut flattened: BTreeMap<String, serde_json::Value>,
460    legacy_version: Option<u16>,
461) -> Result<BTreeMap<String, ciborium::Value>, String> {
462    fn take_extra_map(
463        flattened: &mut BTreeMap<String, serde_json::Value>,
464        key: &str,
465    ) -> Result<Option<BTreeMap<String, serde_json::Value>>, String> {
466        match flattened.remove(key) {
467            None => Ok(None),
468            Some(serde_json::Value::Object(map)) => Ok(Some(map.into_iter().collect())),
469            Some(_) => Err(format!(
470                "'{key}' must be a JSON object when supplied at the top level"
471            )),
472        }
473    }
474
475    let under = take_extra_map(&mut flattened, "_extra_")?;
476    let plain = take_extra_map(&mut flattened, "extra")?;
477    let explicit_extra = match (under, plain) {
478        (Some(_), Some(_)) => {
479            return Err(
480                "both '_extra_' and 'extra' supplied at the top level — choose one".to_string(),
481            );
482        }
483        (Some(m), None) | (None, Some(m)) => m,
484        (None, None) => BTreeMap::new(),
485    };
486
487    let mut cbor_extra: BTreeMap<String, ciborium::Value> = explicit_extra
488        .into_iter()
489        .map(|(k, v)| (k, json_to_cbor(v)))
490        .collect();
491    for (k, v) in flattened {
492        // Explicit `_extra_` beats implicit free-form top-level keys
493        // on collision (explicit beats implicit).
494        cbor_extra.entry(k).or_insert_with(|| json_to_cbor(v));
495    }
496    if let Some(v) = legacy_version {
497        cbor_extra
498            .entry("version".to_string())
499            .or_insert_with(|| ciborium::Value::Integer(u64::from(v).into()));
500    }
501    Ok(cbor_extra)
502}
503
504// ---------------------------------------------------------------------------
505// Message cache builder
506// ---------------------------------------------------------------------------
507
508/// Pre-built CString caches for all descriptor string fields.
509struct MessageCaches {
510    dtype_strings: Vec<CString>,
511    type_strings: Vec<CString>,
512    byte_order_strings: Vec<CString>,
513    filter_strings: Vec<CString>,
514    compression_strings: Vec<CString>,
515    encoding_strings: Vec<CString>,
516    hash_type_strings: Vec<Option<CString>>,
517    hash_value_strings: Vec<Option<CString>>,
518}
519
520/// Extract each data-object frame's inline hash slot from a
521/// single-message wire buffer, via the cheap
522/// [`tensogram::framing::data_object_inline_hashes`] walker.
523///
524/// Returns one entry per `NTensorFrame` in emission order:
525/// `Some(digest)` when the slot is populated, `None` when it
526/// is zero (message-level `HASHES_PRESENT = 0`, or the frame
527/// wasn't hashed).  Returns an empty `Vec` if the buffer
528/// doesn't contain a parseable message — the preceding
529/// `decode()` in every FFI caller will already have surfaced
530/// any structural error.
531///
532/// Cheaper than going through `decode_message` because it
533/// parses only frame headers, not CBOR descriptors.  Matters
534/// for messages with thousands of objects where we'd otherwise
535/// pay a CBOR-parse hit just to locate each inline slot.
536fn extract_inline_hashes(buf: &[u8]) -> Vec<Option<u64>> {
537    use tensogram::framing::{data_object_inline_hashes, scan};
538
539    let messages = scan(buf);
540    let Some(&(msg_off, msg_len)) = messages.first() else {
541        return Vec::new();
542    };
543    let msg = &buf[msg_off..msg_off + msg_len];
544    data_object_inline_hashes(msg).unwrap_or_default()
545}
546
547/// Build all CString caches from the object descriptors and
548/// inline hash slots.
549///
550/// `inline_hashes` is expected to be either empty (no hash data
551/// available — every per-object entry becomes `None`) or the
552/// same length as `objects`.  A longer or shorter slice is
553/// silently truncated / padded with `None` to match `objects`.
554fn build_message_caches(
555    objects: &[(DataObjectDescriptor, Vec<u8>)],
556    inline_hashes: &[Option<u64>],
557) -> MessageCaches {
558    let dtype_strings = objects
559        .iter()
560        .map(|(desc, _)| CString::new(desc.dtype.to_string()).unwrap_or_default())
561        .collect();
562    let type_strings = objects
563        .iter()
564        .map(|(desc, _)| CString::new(desc.obj_type.as_str()).unwrap_or_default())
565        .collect();
566    let byte_order_strings = objects
567        .iter()
568        .map(|(desc, _)| {
569            let s = match desc.byte_order {
570                tensogram::ByteOrder::Big => "big",
571                tensogram::ByteOrder::Little => "little",
572            };
573            CString::new(s).unwrap_or_default()
574        })
575        .collect();
576    let filter_strings = objects
577        .iter()
578        .map(|(desc, _)| CString::new(desc.filter.as_str()).unwrap_or_default())
579        .collect();
580    let compression_strings = objects
581        .iter()
582        .map(|(desc, _)| CString::new(desc.compression.as_str()).unwrap_or_default())
583        .collect();
584    let encoding_strings = objects
585        .iter()
586        .map(|(desc, _)| CString::new(desc.encoding.as_str()).unwrap_or_default())
587        .collect();
588
589    // v3: per-object hash lives in the frame footer's inline slot
590    // (see `plans/WIRE_FORMAT.md` §2.4).  When the caller has
591    // computed the inline slots, surface them through the two
592    // existing accessors; otherwise keep the entries `None`.
593    let hash_type_strings: Vec<Option<CString>> = (0..objects.len())
594        .map(|i| {
595            inline_hashes
596                .get(i)
597                .and_then(|h| h.as_ref())
598                .map(|_| CString::new("xxh3").unwrap_or_default())
599        })
600        .collect();
601    let hash_value_strings: Vec<Option<CString>> = (0..objects.len())
602        .map(|i| {
603            inline_hashes
604                .get(i)
605                .and_then(|h| h.as_ref())
606                .map(|digest| CString::new(format!("{digest:016x}")).unwrap_or_default())
607        })
608        .collect();
609
610    MessageCaches {
611        dtype_strings,
612        type_strings,
613        byte_order_strings,
614        filter_strings,
615        compression_strings,
616        encoding_strings,
617        hash_type_strings,
618        hash_value_strings,
619    }
620}
621
622// ---------------------------------------------------------------------------
623// Shared encode argument parsing
624// ---------------------------------------------------------------------------
625
626/// Parsed and validated arguments shared by `tgm_encode` and `tgm_file_append`.
627struct ParsedEncode<'a> {
628    global_metadata: GlobalMetadata,
629    descriptors: Vec<DataObjectDescriptor>,
630    data_slices: Vec<&'a [u8]>,
631    options: EncodeOptions,
632}
633
634/// Parse the hash algorithm from a nullable C string pointer.
635///
636/// Returns `Ok(false)` when the pointer is null (the C-FFI
637/// convention: `hash_algo = NULL` means "no hashing"), `Ok(true)`
638/// when the caller named the canonical algorithm `"xxh3"`,
639/// `Ok(false)` for the explicit `"none"`, and `Err((code, message))`
640/// on parse failure.
641///
642/// **Important — diverges from the Rust API default.** The Rust
643/// [`tensogram::parse_hash_name`] helper treats `None` as "use the
644/// default = hashing on", which matches `EncodeOptions::default()`.
645/// The FFI keeps the v2 convention `NULL → off` because the C-call
646/// idiom is "if you want a feature, name it; if you don't, pass
647/// NULL" and the FFI's `tgm_encode_*` functions always require an
648/// explicit `hash_algo` argument anyway.
649fn parse_hash_algo(hash_algo: *const c_char) -> Result<bool, (TgmError, String)> {
650    if hash_algo.is_null() {
651        return Ok(false);
652    }
653    let s = unsafe { CStr::from_ptr(hash_algo) }.to_str().map_err(|_| {
654        (
655            TgmError::InvalidArg,
656            "invalid UTF-8 in hash_algo".to_string(),
657        )
658    })?;
659    parse_hash_name(Some(s)).map_err(|e| (TgmError::InvalidArg, e.to_string()))
660}
661
662/// Collect data slices from parallel C arrays with null-pointer validation.
663///
664/// # Safety
665///
666/// `data_ptrs` and `data_lens` must point to valid arrays of at least
667/// `num_objects` elements. Each `data_ptrs[i]` must be valid for
668/// `data_lens[i]` bytes (or may be null only when `data_lens[i] == 0`).
669unsafe fn collect_data_slices<'a>(
670    data_ptrs: *const *const u8,
671    data_lens: *const usize,
672    num_objects: usize,
673) -> Result<Vec<&'a [u8]>, (TgmError, String)> {
674    if num_objects == 0 {
675        return Ok(vec![]);
676    }
677    if data_ptrs.is_null() || data_lens.is_null() {
678        return Err((
679            TgmError::InvalidArg,
680            "null data_ptrs or data_lens".to_string(),
681        ));
682    }
683    let ptrs = unsafe { slice::from_raw_parts(data_ptrs, num_objects) };
684    let lens = unsafe { slice::from_raw_parts(data_lens, num_objects) };
685    for (i, (&p, &l)) in ptrs.iter().zip(lens.iter()).enumerate() {
686        if p.is_null() && l > 0 {
687            return Err((
688                TgmError::InvalidArg,
689                format!("null data pointer at index {i}"),
690            ));
691        }
692    }
693    Ok(ptrs
694        .iter()
695        .zip(lens.iter())
696        .map(|(&p, &l)| {
697            if l == 0 {
698                // Avoid calling slice::from_raw_parts with a potentially null
699                // pointer when length is zero — that is UB even for zero-length
700                // slices per the Rust reference.
701                &[] as &[u8]
702            } else {
703                unsafe { slice::from_raw_parts(p, l) }
704            }
705        })
706        .collect())
707}
708
709/// Parse and validate the common arguments for `tgm_encode` / `tgm_file_append`.
710///
711/// # Safety
712///
713/// All pointer arguments must satisfy the same contracts as the public FFI
714/// functions that delegate to this helper.
715unsafe fn parse_encode_args<'a>(
716    json_str: &str,
717    data_ptrs: *const *const u8,
718    data_lens: *const usize,
719    num_objects: usize,
720    hash_algo: *const c_char,
721    threads: u32,
722) -> Result<ParsedEncode<'a>, (TgmError, String)> {
723    let (global_metadata, descriptors) =
724        parse_encode_json(json_str).map_err(|e| (TgmError::Metadata, e))?;
725
726    if descriptors.len() != num_objects {
727        return Err((
728            TgmError::InvalidArg,
729            format!(
730                "descriptors array length {} does not match num_objects {}",
731                descriptors.len(),
732                num_objects
733            ),
734        ));
735    }
736
737    let data_slices = unsafe { collect_data_slices(data_ptrs, data_lens, num_objects) }?;
738    let hashing = parse_hash_algo(hash_algo)?;
739    let options = EncodeOptions {
740        hashing,
741        threads,
742        ..Default::default()
743    };
744
745    Ok(ParsedEncode {
746        global_metadata,
747        descriptors,
748        data_slices,
749        options,
750    })
751}
752
753// ---------------------------------------------------------------------------
754// Encode
755// ---------------------------------------------------------------------------
756
757/// Encode a Tensogram message from JSON metadata and raw data slices.
758///
759/// `metadata_json`: null-terminated UTF-8 JSON string with:
760///   - `"descriptors"` (array of per-object descriptor objects, required)
761///   - `"base"` (array of per-object application metadata, optional)
762///   - any other top-level keys (e.g. `"mars"`) flow into `_extra_`
763///
764/// A legacy `"version"` top-level field is tolerated and silently
765/// discarded — the wire-format version lives in the preamble, not in
766/// the CBOR metadata frame (see `plans/WIRE_FORMAT.md` §§3, 6.1).
767///
768/// `data_ptrs` / `data_lens`: arrays of length `num_objects`, raw bytes per object.
769///
770/// `hash_algo`: null-terminated string ("xxh3") or NULL for no hash.
771///
772/// On success returns `TgmError::Ok` and fills `out` with the encoded bytes.
773/// The caller must free `out` with `tgm_bytes_free`.
774///
775/// 0.17+: encode rejects non-finite values (NaN / ±Inf) by default.
776/// Use [`tgm_encode_with_options`] with a
777/// [`TgmEncodeMaskOptions`] pointer (`allow_nan` / `allow_inf`) to
778/// opt into NaN / Inf substitution with bitmask companion frames;
779/// this entry point always uses the default reject policy.
780#[unsafe(no_mangle)]
781pub extern "C" fn tgm_encode(
782    metadata_json: *const c_char,
783    data_ptrs: *const *const u8,
784    data_lens: *const usize,
785    num_objects: usize,
786    hash_algo: *const c_char,
787    threads: u32,
788    out: *mut TgmBytes,
789) -> TgmError {
790    if metadata_json.is_null() || out.is_null() {
791        set_last_error("null argument");
792        return TgmError::InvalidArg;
793    }
794
795    let json_str = match unsafe { CStr::from_ptr(metadata_json) }.to_str() {
796        Ok(s) => s,
797        Err(e) => {
798            set_last_error(&format!("invalid UTF-8 in metadata_json: {e}"));
799            return TgmError::InvalidArg;
800        }
801    };
802
803    let parsed = match unsafe {
804        parse_encode_args(
805            json_str,
806            data_ptrs,
807            data_lens,
808            num_objects,
809            hash_algo,
810            threads,
811        )
812    } {
813        Ok(p) => p,
814        Err((code, msg)) => {
815            set_last_error(&msg);
816            return code;
817        }
818    };
819
820    // Build (descriptor, data) pairs for the encode API
821    let pairs: Vec<(&DataObjectDescriptor, &[u8])> = parsed
822        .descriptors
823        .iter()
824        .zip(parsed.data_slices.iter())
825        .map(|(d, s)| (d, *s))
826        .collect();
827
828    match encode(&parsed.global_metadata, &pairs, &parsed.options) {
829        Ok(bytes) => {
830            // Rebuild via boxed slice to guarantee capacity == len for tgm_bytes_free.
831            let mut bytes = bytes.into_boxed_slice().into_vec();
832            let result = TgmBytes {
833                data: bytes.as_mut_ptr(),
834                len: bytes.len(),
835            };
836            std::mem::forget(bytes); // ownership transferred to C
837            unsafe {
838                *out = result;
839            }
840            TgmError::Ok
841        }
842        Err(e) => {
843            set_last_error(&e.to_string());
844            to_error_code(&e)
845        }
846    }
847}
848
849/// Encode with explicit NaN / Inf mask-companion options.
850///
851/// Like [`tgm_encode`] but takes a [`TgmEncodeMaskOptions`] pointer
852/// (nullable — `NULL` behaves like [`tgm_encode`]'s default reject
853/// policy).  All other arguments are identical.
854#[unsafe(no_mangle)]
855#[allow(clippy::too_many_arguments)]
856pub extern "C" fn tgm_encode_with_options(
857    metadata_json: *const c_char,
858    data_ptrs: *const *const u8,
859    data_lens: *const usize,
860    num_objects: usize,
861    hash_algo: *const c_char,
862    threads: u32,
863    mask_options: *const TgmEncodeMaskOptions,
864    out: *mut TgmBytes,
865) -> TgmError {
866    if metadata_json.is_null() || out.is_null() {
867        set_last_error("null argument");
868        return TgmError::InvalidArg;
869    }
870
871    let json_str = match unsafe { CStr::from_ptr(metadata_json) }.to_str() {
872        Ok(s) => s,
873        Err(e) => {
874            set_last_error(&format!("invalid UTF-8 in metadata_json: {e}"));
875            return TgmError::InvalidArg;
876        }
877    };
878
879    let mut parsed = match unsafe {
880        parse_encode_args(
881            json_str,
882            data_ptrs,
883            data_lens,
884            num_objects,
885            hash_algo,
886            threads,
887        )
888    } {
889        Ok(p) => p,
890        Err((code, msg)) => {
891            set_last_error(&msg);
892            return code;
893        }
894    };
895    if let Err(msg) = unsafe { apply_mask_options(&mut parsed.options, mask_options) } {
896        set_last_error(&msg);
897        return TgmError::InvalidArg;
898    }
899
900    let pairs: Vec<(&DataObjectDescriptor, &[u8])> = parsed
901        .descriptors
902        .iter()
903        .zip(parsed.data_slices.iter())
904        .map(|(d, s)| (d, *s))
905        .collect();
906
907    match encode(&parsed.global_metadata, &pairs, &parsed.options) {
908        Ok(bytes) => {
909            let mut bytes = bytes.into_boxed_slice().into_vec();
910            let result = TgmBytes {
911                data: bytes.as_mut_ptr(),
912                len: bytes.len(),
913            };
914            std::mem::forget(bytes);
915            unsafe {
916                *out = result;
917            }
918            TgmError::Ok
919        }
920        Err(e) => {
921            set_last_error(&e.to_string());
922            to_error_code(&e)
923        }
924    }
925}
926
927/// Decode with explicit NaN / Inf restoration options.
928///
929/// Like [`tgm_decode`] but takes a [`TgmDecodeMaskOptions`] pointer
930/// (nullable — `NULL` behaves like [`tgm_decode`]'s default
931/// `restore_non_finite = true`).
932#[unsafe(no_mangle)]
933#[allow(clippy::too_many_arguments)]
934pub extern "C" fn tgm_decode_with_options(
935    buf: *const u8,
936    buf_len: usize,
937    native_byte_order: i32,
938    threads: u32,
939    verify_hash: i32,
940    mask_options: *const TgmDecodeMaskOptions,
941    out: *mut *mut TgmMessage,
942) -> TgmError {
943    if buf.is_null() || out.is_null() {
944        set_last_error("null argument");
945        return TgmError::InvalidArg;
946    }
947
948    let data = unsafe { slice::from_raw_parts(buf, buf_len) };
949    let mut options = DecodeOptions {
950        native_byte_order: native_byte_order != 0,
951        threads,
952        verify_hash: verify_hash != 0,
953        ..Default::default()
954    };
955    unsafe { apply_decode_mask_options(&mut options, mask_options) };
956
957    match decode(data, &options) {
958        Ok((global_metadata, objects)) => {
959            let inline_hashes = extract_inline_hashes(data);
960            let caches = build_message_caches(&objects, &inline_hashes);
961            let msg = Box::new(TgmMessage {
962                global_metadata,
963                objects,
964                dtype_strings: caches.dtype_strings,
965                type_strings: caches.type_strings,
966                byte_order_strings: caches.byte_order_strings,
967                filter_strings: caches.filter_strings,
968                compression_strings: caches.compression_strings,
969                encoding_strings: caches.encoding_strings,
970                hash_type_strings: caches.hash_type_strings,
971                hash_value_strings: caches.hash_value_strings,
972            });
973            unsafe {
974                *out = Box::into_raw(msg);
975            }
976            TgmError::Ok
977        }
978        Err(e) => {
979            set_last_error(&e.to_string());
980            to_error_code(&e)
981        }
982    }
983}
984
985/// Streaming-encoder constructor with NaN / Inf mask-companion options.
986///
987/// Like [`tgm_streaming_encoder_create`] but takes a
988/// [`TgmEncodeMaskOptions`] pointer.  `NULL` behaves like the default
989/// reject policy.
990#[unsafe(no_mangle)]
991#[allow(clippy::too_many_arguments)]
992pub extern "C" fn tgm_streaming_encoder_create_with_options(
993    path: *const c_char,
994    metadata_json: *const c_char,
995    hash_algo: *const c_char,
996    threads: u32,
997    mask_options: *const TgmEncodeMaskOptions,
998    out: *mut *mut TgmStreamingEncoder,
999) -> TgmError {
1000    // Delegate to the existing tgm_streaming_encoder_create for the
1001    // validation + file-creation side-effects, then — if that
1002    // succeeded AND the caller passed a non-NULL mask_options — no
1003    // further adjustment is needed because the encoder has been
1004    // constructed with default options.  For an opt-in mask-aware
1005    // encoder we take a direct path through StreamingEncoder::new
1006    // with the full options set.
1007    //
1008    // Rationale for the direct path: the library-level EncodeOptions
1009    // is snapshotted at construction in StreamingEncoder, so we can't
1010    // retrofit mask options after the fact.  We therefore replicate
1011    // the existing validation + open logic here, pointer-for-pointer.
1012    if path.is_null() || metadata_json.is_null() || out.is_null() {
1013        set_last_error("null argument");
1014        return TgmError::InvalidArg;
1015    }
1016    let path_str = match unsafe { CStr::from_ptr(path) }.to_str() {
1017        Ok(s) => s,
1018        Err(e) => {
1019            set_last_error(&format!("invalid UTF-8 in path: {e}"));
1020            return TgmError::InvalidArg;
1021        }
1022    };
1023    let json_str = match unsafe { CStr::from_ptr(metadata_json) }.to_str() {
1024        Ok(s) => s,
1025        Err(e) => {
1026            set_last_error(&format!("invalid UTF-8 in metadata_json: {e}"));
1027            return TgmError::InvalidArg;
1028        }
1029    };
1030    let global_metadata = match parse_streaming_metadata_json(json_str) {
1031        Ok(m) => m,
1032        Err(e) => {
1033            set_last_error(&e);
1034            return TgmError::Metadata;
1035        }
1036    };
1037    let hashing = match parse_hash_algo(hash_algo) {
1038        Ok(b) => b,
1039        Err((code, msg)) => {
1040            set_last_error(&msg);
1041            return code;
1042        }
1043    };
1044    let file = match std::fs::File::create(path_str) {
1045        Ok(f) => f,
1046        Err(e) => {
1047            set_last_error(&e.to_string());
1048            return TgmError::Io;
1049        }
1050    };
1051    let mut options = EncodeOptions {
1052        hashing,
1053        threads,
1054        ..Default::default()
1055    };
1056    if let Err(msg) = unsafe { apply_mask_options(&mut options, mask_options) } {
1057        set_last_error(&msg);
1058        return TgmError::InvalidArg;
1059    }
1060    let writer = std::io::BufWriter::new(file);
1061    match StreamingEncoder::new(writer, &global_metadata, &options) {
1062        Ok(enc) => {
1063            let handle = Box::new(TgmStreamingEncoder { inner: Some(enc) });
1064            unsafe {
1065                *out = Box::into_raw(handle);
1066            }
1067            TgmError::Ok
1068        }
1069        Err(e) => {
1070            set_last_error(&e.to_string());
1071            to_error_code(&e)
1072        }
1073    }
1074}
1075
1076/// Append a message to a file with explicit NaN / Inf mask-companion options.
1077///
1078/// Like [`tgm_file_append`] but takes a [`TgmEncodeMaskOptions`]
1079/// pointer.  `NULL` behaves like the default reject policy.
1080#[unsafe(no_mangle)]
1081#[allow(clippy::too_many_arguments)]
1082pub extern "C" fn tgm_file_append_with_options(
1083    file: *mut TgmFile,
1084    metadata_json: *const c_char,
1085    data_ptrs: *const *const u8,
1086    data_lens: *const usize,
1087    num_objects: usize,
1088    hash_algo: *const c_char,
1089    threads: u32,
1090    mask_options: *const TgmEncodeMaskOptions,
1091) -> TgmError {
1092    if file.is_null() || metadata_json.is_null() {
1093        set_last_error("null argument");
1094        return TgmError::InvalidArg;
1095    }
1096    let json_str = match unsafe { CStr::from_ptr(metadata_json) }.to_str() {
1097        Ok(s) => s,
1098        Err(e) => {
1099            set_last_error(&format!("invalid UTF-8 in metadata_json: {e}"));
1100            return TgmError::InvalidArg;
1101        }
1102    };
1103    let mut parsed = match unsafe {
1104        parse_encode_args(
1105            json_str,
1106            data_ptrs,
1107            data_lens,
1108            num_objects,
1109            hash_algo,
1110            threads,
1111        )
1112    } {
1113        Ok(p) => p,
1114        Err((code, msg)) => {
1115            set_last_error(&msg);
1116            return code;
1117        }
1118    };
1119    if let Err(msg) = unsafe { apply_mask_options(&mut parsed.options, mask_options) } {
1120        set_last_error(&msg);
1121        return TgmError::InvalidArg;
1122    }
1123    let pairs: Vec<(&DataObjectDescriptor, &[u8])> = parsed
1124        .descriptors
1125        .iter()
1126        .zip(parsed.data_slices.iter())
1127        .map(|(d, s)| (d, *s))
1128        .collect();
1129    let f = unsafe { &mut (*file).file };
1130    match f.append(&parsed.global_metadata, &pairs, &parsed.options) {
1131        Ok(()) => TgmError::Ok,
1132        Err(e) => {
1133            set_last_error(&e.to_string());
1134            to_error_code(&e)
1135        }
1136    }
1137}
1138
1139/// Encode a Tensogram message from JSON metadata and pre-encoded payload bytes.
1140///
1141/// Like `tgm_encode`, but each `data_ptrs[i]` slice must already be encoded
1142/// according to the matching descriptor's `encoding` / `filter` / `compression`
1143/// pipeline. The library does not run the encoding pipeline again — it writes
1144/// the caller-provided bytes directly into the wire-format payload after
1145/// validating that the descriptor's pipeline configuration is well-formed.
1146///
1147/// `metadata_json`: same flat JSON schema as `tgm_encode` (`version`,
1148///   `descriptors`, optional `base`, plus arbitrary extra top-level keys).
1149///
1150/// `data_ptrs` / `data_lens`: arrays of length `num_objects` pointing at
1151///   already-encoded payload bytes (one entry per descriptor).
1152///
1153/// `hash_algo`: null-terminated string ("xxh3") or NULL for no hash. The
1154///   library always recomputes the hash over the caller's bytes; any
1155///   `hash` field embedded in the descriptor JSON is ignored and overwritten.
1156///
1157/// Notes for compression-aware decoding:
1158/// - For `szip` compression, callers SHOULD include `szip_block_offsets`
1159///   (a list of bit offsets into the compressed payload) inside the
1160///   matching descriptor's params so that `tgm_decode_range` can locate
1161///   szip block boundaries without rescanning the compressed stream.
1162/// - Other pipeline params (e.g. `simple_packing` reference value, scale
1163///   factors) must also be present in the descriptor — they are not
1164///   inferred from the bytes.
1165///
1166/// On success returns `TgmError::Ok` and fills `out` with the encoded message.
1167/// The caller must free `out` with `tgm_bytes_free`.
1168#[unsafe(no_mangle)]
1169pub extern "C" fn tgm_encode_pre_encoded(
1170    metadata_json: *const c_char,
1171    data_ptrs: *const *const u8,
1172    data_lens: *const usize,
1173    num_objects: usize,
1174    hash_algo: *const c_char,
1175    threads: u32,
1176    out: *mut TgmBytes,
1177) -> TgmError {
1178    if metadata_json.is_null() || out.is_null() {
1179        set_last_error("null argument");
1180        return TgmError::InvalidArg;
1181    }
1182
1183    let json_str = match unsafe { CStr::from_ptr(metadata_json) }.to_str() {
1184        Ok(s) => s,
1185        Err(e) => {
1186            set_last_error(&format!("invalid UTF-8 in metadata_json: {e}"));
1187            return TgmError::InvalidArg;
1188        }
1189    };
1190
1191    let parsed = match unsafe {
1192        parse_encode_args(
1193            json_str,
1194            data_ptrs,
1195            data_lens,
1196            num_objects,
1197            hash_algo,
1198            threads,
1199        )
1200    } {
1201        Ok(p) => p,
1202        Err((code, msg)) => {
1203            set_last_error(&msg);
1204            return code;
1205        }
1206    };
1207
1208    // Build (descriptor, pre-encoded data) pairs for the pre-encoded API.
1209    let pairs: Vec<(&DataObjectDescriptor, &[u8])> = parsed
1210        .descriptors
1211        .iter()
1212        .zip(parsed.data_slices.iter())
1213        .map(|(d, s)| (d, *s))
1214        .collect();
1215
1216    match encode_pre_encoded(&parsed.global_metadata, &pairs, &parsed.options) {
1217        Ok(bytes) => {
1218            // Rebuild via boxed slice to guarantee capacity == len for tgm_bytes_free.
1219            let mut bytes = bytes.into_boxed_slice().into_vec();
1220            let result = TgmBytes {
1221                data: bytes.as_mut_ptr(),
1222                len: bytes.len(),
1223            };
1224            std::mem::forget(bytes); // ownership transferred to C
1225            unsafe {
1226                *out = result;
1227            }
1228            TgmError::Ok
1229        }
1230        Err(e) => {
1231            set_last_error(&e.to_string());
1232            to_error_code(&e)
1233        }
1234    }
1235}
1236
1237// ---------------------------------------------------------------------------
1238// Decode
1239// ---------------------------------------------------------------------------
1240
1241/// Decode a complete message (global metadata + all object payloads).
1242///
1243/// `buf` / `buf_len`: the wire-format message bytes.
1244///
1245/// On success, fills `out` with a `TgmMessage` handle.
1246/// Free with `tgm_message_free`.
1247#[unsafe(no_mangle)]
1248pub extern "C" fn tgm_decode(
1249    buf: *const u8,
1250    buf_len: usize,
1251    native_byte_order: i32,
1252    threads: u32,
1253    verify_hash: i32,
1254    out: *mut *mut TgmMessage,
1255) -> TgmError {
1256    if buf.is_null() || out.is_null() {
1257        set_last_error("null argument");
1258        return TgmError::InvalidArg;
1259    }
1260
1261    let data = unsafe { slice::from_raw_parts(buf, buf_len) };
1262    let options = DecodeOptions {
1263        native_byte_order: native_byte_order != 0,
1264        threads,
1265        verify_hash: verify_hash != 0,
1266        ..Default::default()
1267    };
1268
1269    match decode(data, &options) {
1270        Ok((global_metadata, objects)) => {
1271            let inline_hashes = extract_inline_hashes(data);
1272            let caches = build_message_caches(&objects, &inline_hashes);
1273            let msg = Box::new(TgmMessage {
1274                global_metadata,
1275                objects,
1276                dtype_strings: caches.dtype_strings,
1277                type_strings: caches.type_strings,
1278                byte_order_strings: caches.byte_order_strings,
1279                filter_strings: caches.filter_strings,
1280                compression_strings: caches.compression_strings,
1281                encoding_strings: caches.encoding_strings,
1282                hash_type_strings: caches.hash_type_strings,
1283                hash_value_strings: caches.hash_value_strings,
1284            });
1285            unsafe {
1286                *out = Box::into_raw(msg);
1287            }
1288            TgmError::Ok
1289        }
1290        Err(e) => {
1291            set_last_error(&e.to_string());
1292            to_error_code(&e)
1293        }
1294    }
1295}
1296
1297/// Decode only the global metadata (no payload bytes are read).
1298#[unsafe(no_mangle)]
1299pub extern "C" fn tgm_decode_metadata(
1300    buf: *const u8,
1301    buf_len: usize,
1302    out: *mut *mut TgmMetadata,
1303) -> TgmError {
1304    if buf.is_null() || out.is_null() {
1305        set_last_error("null argument");
1306        return TgmError::InvalidArg;
1307    }
1308
1309    let data = unsafe { slice::from_raw_parts(buf, buf_len) };
1310
1311    match decode_metadata(data) {
1312        Ok(global_metadata) => {
1313            let m = Box::new(TgmMetadata {
1314                global_metadata,
1315                cache: std::cell::RefCell::new(BTreeMap::new()),
1316            });
1317            unsafe {
1318                *out = Box::into_raw(m);
1319            }
1320            TgmError::Ok
1321        }
1322        Err(e) => {
1323            set_last_error(&e.to_string());
1324            to_error_code(&e)
1325        }
1326    }
1327}
1328
1329/// Decode a single object by index.
1330///
1331/// On success, fills `out` with a `TgmMessage` handle containing exactly
1332/// one object (at index 0). The global metadata covers the whole message.
1333#[unsafe(no_mangle)]
1334pub extern "C" fn tgm_decode_object(
1335    buf: *const u8,
1336    buf_len: usize,
1337    index: usize,
1338    native_byte_order: i32,
1339    threads: u32,
1340    verify_hash: i32,
1341    out: *mut *mut TgmMessage,
1342) -> TgmError {
1343    if buf.is_null() || out.is_null() {
1344        set_last_error("null argument");
1345        return TgmError::InvalidArg;
1346    }
1347
1348    let data = unsafe { slice::from_raw_parts(buf, buf_len) };
1349    let options = DecodeOptions {
1350        native_byte_order: native_byte_order != 0,
1351        threads,
1352        verify_hash: verify_hash != 0,
1353        ..Default::default()
1354    };
1355
1356    match decode_object(data, index, &options) {
1357        Ok((global_metadata, descriptor, obj_bytes)) => {
1358            let objects = vec![(descriptor, obj_bytes)];
1359            let inline_hashes = extract_inline_hashes(data);
1360            let caches = build_message_caches(&objects, &inline_hashes);
1361            let msg = Box::new(TgmMessage {
1362                global_metadata,
1363                objects,
1364                dtype_strings: caches.dtype_strings,
1365                type_strings: caches.type_strings,
1366                byte_order_strings: caches.byte_order_strings,
1367                filter_strings: caches.filter_strings,
1368                compression_strings: caches.compression_strings,
1369                encoding_strings: caches.encoding_strings,
1370                hash_type_strings: caches.hash_type_strings,
1371                hash_value_strings: caches.hash_value_strings,
1372            });
1373            unsafe {
1374                *out = Box::into_raw(msg);
1375            }
1376            TgmError::Ok
1377        }
1378        Err(e) => {
1379            set_last_error(&e.to_string());
1380            to_error_code(&e)
1381        }
1382    }
1383}
1384
1385/// Decode partial ranges from a data object.
1386///
1387/// `ranges_offsets` / `ranges_counts`: parallel arrays of (element_offset, element_count).
1388/// `num_ranges`: length of both arrays.
1389/// `join`: when non-zero, concatenate all ranges into a single buffer in `out[0]`
1390///         and set `*out_count = 1`.  When zero (split mode), write one `TgmBytes`
1391///         per range into `out[0..num_ranges]` and set `*out_count = num_ranges`.
1392///         The caller must pre-allocate `out` with at least `num_ranges` entries
1393///         when `join == 0`, or 1 entry when `join != 0`.
1394/// `out_count`: filled with the number of buffers written to `out`.
1395///
1396/// Free each returned buffer with `tgm_bytes_free`.
1397#[unsafe(no_mangle)]
1398#[allow(clippy::too_many_arguments)]
1399pub extern "C" fn tgm_decode_range(
1400    buf: *const u8,
1401    buf_len: usize,
1402    object_index: usize,
1403    ranges_offsets: *const u64,
1404    ranges_counts: *const u64,
1405    num_ranges: usize,
1406    native_byte_order: i32,
1407    threads: u32,
1408    join: i32,
1409    out: *mut TgmBytes,
1410    out_count: *mut usize,
1411) -> TgmError {
1412    if buf.is_null() || out.is_null() || out_count.is_null() {
1413        set_last_error("null argument");
1414        return TgmError::InvalidArg;
1415    }
1416    if num_ranges > 0 && (ranges_offsets.is_null() || ranges_counts.is_null()) {
1417        set_last_error("null ranges_offsets or ranges_counts");
1418        return TgmError::InvalidArg;
1419    }
1420
1421    let data = unsafe { slice::from_raw_parts(buf, buf_len) };
1422    let options = DecodeOptions {
1423        native_byte_order: native_byte_order != 0,
1424        threads,
1425        ..Default::default()
1426    };
1427
1428    let ranges: Vec<(u64, u64)> = if num_ranges == 0 {
1429        vec![]
1430    } else {
1431        unsafe {
1432            let offsets = slice::from_raw_parts(ranges_offsets, num_ranges);
1433            let counts = slice::from_raw_parts(ranges_counts, num_ranges);
1434            offsets
1435                .iter()
1436                .zip(counts.iter())
1437                .map(|(&o, &c)| (o, c))
1438                .collect()
1439        }
1440    };
1441
1442    match decode_range(data, object_index, &ranges, &options) {
1443        Ok((_, parts)) => {
1444            if join != 0 {
1445                // Concatenate all parts into a single buffer.
1446                let joined: Vec<u8> = parts.into_iter().flatten().collect();
1447                let mut joined = joined.into_boxed_slice().into_vec();
1448                let result = TgmBytes {
1449                    data: joined.as_mut_ptr(),
1450                    len: joined.len(),
1451                };
1452                std::mem::forget(joined);
1453                unsafe {
1454                    *out = result;
1455                    *out_count = 1;
1456                }
1457            } else {
1458                // Write one TgmBytes per range.
1459                let n = parts.len();
1460                for (i, part) in parts.into_iter().enumerate() {
1461                    let mut part = part.into_boxed_slice().into_vec();
1462                    let result = TgmBytes {
1463                        data: part.as_mut_ptr(),
1464                        len: part.len(),
1465                    };
1466                    std::mem::forget(part);
1467                    unsafe {
1468                        *out.add(i) = result;
1469                    }
1470                }
1471                unsafe {
1472                    *out_count = n;
1473                }
1474            }
1475            TgmError::Ok
1476        }
1477        Err(e) => {
1478            set_last_error(&e.to_string());
1479            to_error_code(&e)
1480        }
1481    }
1482}
1483
1484// ---------------------------------------------------------------------------
1485// Scan
1486// ---------------------------------------------------------------------------
1487
1488/// Scan a buffer for message boundaries.
1489///
1490/// Returns a `TgmScanResult` handle. Access entries with `tgm_scan_count`
1491/// and `tgm_scan_entry`. Free with `tgm_scan_free`.
1492#[unsafe(no_mangle)]
1493pub extern "C" fn tgm_scan(
1494    buf: *const u8,
1495    buf_len: usize,
1496    out: *mut *mut TgmScanResult,
1497) -> TgmError {
1498    if buf.is_null() || out.is_null() {
1499        set_last_error("null argument");
1500        return TgmError::InvalidArg;
1501    }
1502
1503    let data = unsafe { slice::from_raw_parts(buf, buf_len) };
1504    let offsets = scan(data);
1505    let entries: Vec<TgmScanEntry> = offsets
1506        .into_iter()
1507        .map(|(offset, length)| TgmScanEntry { offset, length })
1508        .collect();
1509    let result = Box::new(TgmScanResult { entries });
1510    unsafe {
1511        *out = Box::into_raw(result);
1512    }
1513    TgmError::Ok
1514}
1515
1516/// # Safety: caller must pass valid, non-null pointer from tgm_scan.
1517unsafe fn as_scan(result: *const TgmScanResult) -> Option<&'static TgmScanResult> {
1518    unsafe {
1519        if result.is_null() {
1520            None
1521        } else {
1522            Some(&*result)
1523        }
1524    }
1525}
1526
1527/// # Safety: caller must pass valid, non-null pointer from tgm_decode*.
1528unsafe fn as_msg(msg: *const TgmMessage) -> Option<&'static TgmMessage> {
1529    unsafe { if msg.is_null() { None } else { Some(&*msg) } }
1530}
1531
1532/// Returns the number of messages found by `tgm_scan`.
1533#[unsafe(no_mangle)]
1534pub extern "C" fn tgm_scan_count(result: *const TgmScanResult) -> usize {
1535    unsafe { as_scan(result).map(|r| r.entries.len()).unwrap_or(0) }
1536}
1537
1538#[unsafe(no_mangle)]
1539pub extern "C" fn tgm_scan_entry(result: *const TgmScanResult, index: usize) -> TgmScanEntry {
1540    let fallback = TgmScanEntry {
1541        offset: usize::MAX,
1542        length: 0,
1543    };
1544    unsafe {
1545        match as_scan(result) {
1546            Some(r) => match r.entries.get(index) {
1547                Some(entry) => *entry,
1548                None => {
1549                    set_last_error(&format!(
1550                        "scan entry index {} out of range (count={})",
1551                        index,
1552                        r.entries.len()
1553                    ));
1554                    fallback
1555                }
1556            },
1557            None => {
1558                set_last_error("null scan result handle");
1559                fallback
1560            }
1561        }
1562    }
1563}
1564
1565/// Free a scan result handle.
1566#[unsafe(no_mangle)]
1567pub extern "C" fn tgm_scan_free(result: *mut TgmScanResult) {
1568    if !result.is_null() {
1569        unsafe {
1570            drop(Box::from_raw(result));
1571        }
1572    }
1573}
1574
1575// ---------------------------------------------------------------------------
1576// Message accessors
1577// ---------------------------------------------------------------------------
1578
1579/// Returns the wire format version the decoder read from the preamble.
1580///
1581/// v3 decoders reject any other version at preamble parse time, so
1582/// this always returns [`tensogram::WIRE_VERSION`] for any live
1583/// `TgmMessage` handle.  The CBOR metadata frame carries no
1584/// `version` key of its own (see `plans/WIRE_FORMAT.md` §6.1).
1585/// Returns `0` for a null handle.
1586#[unsafe(no_mangle)]
1587pub extern "C" fn tgm_message_version(msg: *const TgmMessage) -> u64 {
1588    unsafe {
1589        as_msg(msg)
1590            .map(|_| tensogram::WIRE_VERSION as u64)
1591            .unwrap_or(0)
1592    }
1593}
1594
1595/// Returns the number of decoded objects in this message handle.
1596/// For `tgm_decode` this equals the total object count; for
1597/// `tgm_decode_object` this is always 1.
1598#[unsafe(no_mangle)]
1599pub extern "C" fn tgm_message_num_objects(msg: *const TgmMessage) -> usize {
1600    unsafe { as_msg(msg).map(|m| m.objects.len()).unwrap_or(0) }
1601}
1602
1603/// Returns the number of decoded payload buffers.
1604/// Equivalent to `tgm_message_num_objects` — kept for ABI compatibility.
1605#[unsafe(no_mangle)]
1606pub extern "C" fn tgm_message_num_decoded(msg: *const TgmMessage) -> usize {
1607    unsafe { as_msg(msg).map(|m| m.objects.len()).unwrap_or(0) }
1608}
1609
1610/// Returns the number of dimensions for object at index.
1611#[unsafe(no_mangle)]
1612pub extern "C" fn tgm_object_ndim(msg: *const TgmMessage, index: usize) -> u64 {
1613    unsafe {
1614        as_msg(msg)
1615            .and_then(|m| m.objects.get(index))
1616            .map(|(desc, _)| desc.ndim)
1617            .unwrap_or(0)
1618    }
1619}
1620
1621/// Returns a pointer to the shape array. Length is `tgm_object_ndim()`.
1622/// The pointer is valid until the message is freed.
1623#[unsafe(no_mangle)]
1624pub extern "C" fn tgm_object_shape(msg: *const TgmMessage, index: usize) -> *const u64 {
1625    unsafe {
1626        as_msg(msg)
1627            .and_then(|m| m.objects.get(index))
1628            .map(|(desc, _)| desc.shape.as_ptr())
1629            .unwrap_or(ptr::null())
1630    }
1631}
1632
1633/// Returns a pointer to the strides array. Length is `tgm_object_ndim()`.
1634#[unsafe(no_mangle)]
1635pub extern "C" fn tgm_object_strides(msg: *const TgmMessage, index: usize) -> *const u64 {
1636    unsafe {
1637        as_msg(msg)
1638            .and_then(|m| m.objects.get(index))
1639            .map(|(desc, _)| desc.strides.as_ptr())
1640            .unwrap_or(ptr::null())
1641    }
1642}
1643
1644/// Returns the dtype as a null-terminated string (e.g. "float32").
1645/// The pointer is valid until the message is freed.
1646#[unsafe(no_mangle)]
1647pub extern "C" fn tgm_object_dtype(msg: *const TgmMessage, index: usize) -> *const c_char {
1648    unsafe {
1649        as_msg(msg)
1650            .and_then(|m| m.dtype_strings.get(index))
1651            .map(|s| s.as_ptr())
1652            .unwrap_or(ptr::null())
1653    }
1654}
1655
1656/// Returns a pointer to the decoded payload bytes for a decoded object.
1657/// `decoded_index` is the index into the decoded objects array (0 for the
1658/// first decoded object, regardless of the original object index).
1659/// `out_len` receives the byte length.
1660#[unsafe(no_mangle)]
1661pub extern "C" fn tgm_object_data(
1662    msg: *const TgmMessage,
1663    decoded_index: usize,
1664    out_len: *mut usize,
1665) -> *const u8 {
1666    unsafe {
1667        match as_msg(msg).and_then(|m| m.objects.get(decoded_index)) {
1668            Some((_, data)) => {
1669                if !out_len.is_null() {
1670                    *out_len = data.len();
1671                }
1672                data.as_ptr()
1673            }
1674            None => {
1675                if !out_len.is_null() {
1676                    *out_len = 0;
1677                }
1678                ptr::null()
1679            }
1680        }
1681    }
1682}
1683
1684/// Returns the encoding string for a data object descriptor (e.g. "none", "simple_packing").
1685/// The pointer is valid until the message is freed.
1686#[unsafe(no_mangle)]
1687pub extern "C" fn tgm_payload_encoding(msg: *const TgmMessage, index: usize) -> *const c_char {
1688    unsafe {
1689        as_msg(msg)
1690            .and_then(|m| m.encoding_strings.get(index))
1691            .map(|s| s.as_ptr())
1692            .unwrap_or(ptr::null())
1693    }
1694}
1695
1696/// Returns 1 if the i-th data object has a populated inline hash
1697/// slot, 0 otherwise.
1698///
1699/// In v3 the per-object hash lives in the frame footer's inline
1700/// slot (see `plans/WIRE_FORMAT.md` §2.4) rather than the CBOR
1701/// descriptor.  v3 hashing is a message-wide toggle: either every
1702/// frame's slot is populated (preamble flag `HASHES_PRESENT = 1`)
1703/// or every slot is zero (`HASHES_PRESENT = 0`).  This accessor
1704/// returns 1 when the i-th slot holds a non-zero xxh3-64 digest,
1705/// and 0 when the slot is zero (most commonly the whole-message
1706/// `HASHES_PRESENT = 0` case) or when the index is out of range.
1707///
1708/// A zero slot on a message that advertises `HASHES_PRESENT = 1`
1709/// is a structural anomaly (tamper or writer bug) — surface via
1710/// `tgm_validate` at the `checksum` / `integrity` level, which
1711/// will report a HashMismatch against the body's recomputed digest.
1712///
1713/// The matching hex digest is available via
1714/// [`tgm_object_hash_value`]; the algorithm tag (always
1715/// `"xxh3"` in v3) via [`tgm_object_hash_type`].
1716#[unsafe(no_mangle)]
1717pub extern "C" fn tgm_payload_has_hash(msg: *const TgmMessage, index: usize) -> i32 {
1718    unsafe {
1719        as_msg(msg)
1720            .and_then(|m| m.hash_value_strings.get(index))
1721            .map(|opt| opt.is_some() as i32)
1722            .unwrap_or(0)
1723    }
1724}
1725
1726/// Extract a metadata handle from a decoded message.
1727/// The metadata handle is independent — free it separately with `tgm_metadata_free`.
1728#[unsafe(no_mangle)]
1729pub extern "C" fn tgm_message_metadata(
1730    msg: *const TgmMessage,
1731    out: *mut *mut TgmMetadata,
1732) -> TgmError {
1733    if msg.is_null() || out.is_null() {
1734        set_last_error("null argument");
1735        return TgmError::InvalidArg;
1736    }
1737    let m = unsafe { &*msg };
1738    let meta = Box::new(TgmMetadata {
1739        global_metadata: m.global_metadata.clone(),
1740        cache: std::cell::RefCell::new(BTreeMap::new()),
1741    });
1742    unsafe {
1743        *out = Box::into_raw(meta);
1744    }
1745    TgmError::Ok
1746}
1747
1748/// Returns the object type string (e.g. "ndarray"). Valid until message freed.
1749#[unsafe(no_mangle)]
1750pub extern "C" fn tgm_object_type(msg: *const TgmMessage, index: usize) -> *const c_char {
1751    unsafe {
1752        as_msg(msg)
1753            .and_then(|m| m.type_strings.get(index))
1754            .map(|s| s.as_ptr())
1755            .unwrap_or(ptr::null())
1756    }
1757}
1758
1759/// Returns the byte order string ("big" or "little"). Valid until message freed.
1760#[unsafe(no_mangle)]
1761pub extern "C" fn tgm_object_byte_order(msg: *const TgmMessage, index: usize) -> *const c_char {
1762    unsafe {
1763        as_msg(msg)
1764            .and_then(|m| m.byte_order_strings.get(index))
1765            .map(|s| s.as_ptr())
1766            .unwrap_or(ptr::null())
1767    }
1768}
1769
1770/// Returns the filter string (e.g. "none", "shuffle"). Valid until message freed.
1771#[unsafe(no_mangle)]
1772pub extern "C" fn tgm_object_filter(msg: *const TgmMessage, index: usize) -> *const c_char {
1773    unsafe {
1774        as_msg(msg)
1775            .and_then(|m| m.filter_strings.get(index))
1776            .map(|s| s.as_ptr())
1777            .unwrap_or(ptr::null())
1778    }
1779}
1780
1781/// Returns the compression string (e.g. "none", "zstd"). Valid until message freed.
1782#[unsafe(no_mangle)]
1783pub extern "C" fn tgm_object_compression(msg: *const TgmMessage, index: usize) -> *const c_char {
1784    unsafe {
1785        as_msg(msg)
1786            .and_then(|m| m.compression_strings.get(index))
1787            .map(|s| s.as_ptr())
1788            .unwrap_or(ptr::null())
1789    }
1790}
1791
1792/// Returns the hash type string ("xxh3") or NULL if no hash. Valid until message freed.
1793#[unsafe(no_mangle)]
1794pub extern "C" fn tgm_object_hash_type(msg: *const TgmMessage, index: usize) -> *const c_char {
1795    unsafe {
1796        as_msg(msg)
1797            .and_then(|m| m.hash_type_strings.get(index))
1798            .and_then(|opt| opt.as_ref())
1799            .map(|s| s.as_ptr())
1800            .unwrap_or(ptr::null())
1801    }
1802}
1803
1804/// Returns the hash value hex string or NULL if no hash. Valid until message freed.
1805#[unsafe(no_mangle)]
1806pub extern "C" fn tgm_object_hash_value(msg: *const TgmMessage, index: usize) -> *const c_char {
1807    unsafe {
1808        as_msg(msg)
1809            .and_then(|m| m.hash_value_strings.get(index))
1810            .and_then(|opt| opt.as_ref())
1811            .map(|s| s.as_ptr())
1812            .unwrap_or(ptr::null())
1813    }
1814}
1815
1816/// Free a decoded message handle.
1817#[unsafe(no_mangle)]
1818pub extern "C" fn tgm_message_free(msg: *mut TgmMessage) {
1819    if !msg.is_null() {
1820        unsafe {
1821            drop(Box::from_raw(msg));
1822        }
1823    }
1824}
1825
1826// ---------------------------------------------------------------------------
1827// Metadata accessors
1828// ---------------------------------------------------------------------------
1829
1830/// Returns the wire format version.
1831///
1832/// Sourced from [`tensogram::WIRE_VERSION`] since v3 decoders reject any
1833/// other version at preamble parse time.  The CBOR metadata frame
1834/// carries no `version` key of its own (see
1835/// `plans/WIRE_FORMAT.md` §6.1).  Returns `0` for a null handle.
1836#[unsafe(no_mangle)]
1837pub extern "C" fn tgm_metadata_version(meta: *const TgmMetadata) -> u64 {
1838    if meta.is_null() {
1839        return 0;
1840    }
1841    tensogram::WIRE_VERSION as u64
1842}
1843
1844/// Returns the number of objects described in the global metadata.
1845///
1846/// Returns the length of the `base` array, which has one entry per data object.
1847#[unsafe(no_mangle)]
1848pub extern "C" fn tgm_metadata_num_objects(meta: *const TgmMetadata) -> usize {
1849    if meta.is_null() {
1850        return 0;
1851    }
1852    unsafe { (*meta).global_metadata.base.len() }
1853}
1854
1855/// Look up a string value by dot-notation key (e.g. "mars.class").
1856/// Returns NULL if the key is not found or is not a string.
1857/// The pointer is valid until the metadata handle is freed.
1858#[unsafe(no_mangle)]
1859pub extern "C" fn tgm_metadata_get_string(
1860    meta: *const TgmMetadata,
1861    key: *const c_char,
1862) -> *const c_char {
1863    if meta.is_null() || key.is_null() {
1864        return ptr::null();
1865    }
1866
1867    let key_str = match unsafe { CStr::from_ptr(key) }.to_str() {
1868        Ok(s) => s,
1869        Err(_) => return ptr::null(),
1870    };
1871
1872    let m = unsafe { &(*meta) };
1873    let value = lookup_string_key(&m.global_metadata, key_str);
1874
1875    match value {
1876        Some(s) => {
1877            let mut cache = m.cache.borrow_mut();
1878            let entry = cache
1879                .entry(key_str.to_string())
1880                .or_insert_with(|| CString::new(s.clone()).unwrap_or_default());
1881            entry.as_ptr()
1882        }
1883        None => ptr::null(),
1884    }
1885}
1886
1887/// Look up an integer value by dot-notation key.
1888/// Returns `default_val` if the key is not found or is not an integer.
1889#[unsafe(no_mangle)]
1890pub extern "C" fn tgm_metadata_get_int(
1891    meta: *const TgmMetadata,
1892    key: *const c_char,
1893    default_val: i64,
1894) -> i64 {
1895    if meta.is_null() || key.is_null() {
1896        return default_val;
1897    }
1898
1899    let key_str = match unsafe { CStr::from_ptr(key) }.to_str() {
1900        Ok(s) => s,
1901        Err(_) => return default_val,
1902    };
1903
1904    let m = unsafe { &(*meta) };
1905    lookup_int_key(&m.global_metadata, key_str).unwrap_or(default_val)
1906}
1907
1908/// Look up a float value by dot-notation key.
1909#[unsafe(no_mangle)]
1910pub extern "C" fn tgm_metadata_get_float(
1911    meta: *const TgmMetadata,
1912    key: *const c_char,
1913    default_val: f64,
1914) -> f64 {
1915    if meta.is_null() || key.is_null() {
1916        return default_val;
1917    }
1918
1919    let key_str = match unsafe { CStr::from_ptr(key) }.to_str() {
1920        Ok(s) => s,
1921        Err(_) => return default_val,
1922    };
1923
1924    let m = unsafe { &(*meta) };
1925    lookup_float_key(&m.global_metadata, key_str).unwrap_or(default_val)
1926}
1927
1928/// Free a metadata handle.
1929#[unsafe(no_mangle)]
1930pub extern "C" fn tgm_metadata_free(meta: *mut TgmMetadata) {
1931    if !meta.is_null() {
1932        unsafe {
1933            drop(Box::from_raw(meta));
1934        }
1935    }
1936}
1937
1938// ---------------------------------------------------------------------------
1939// File API
1940// ---------------------------------------------------------------------------
1941
1942/// Open an existing Tensogram file for reading.
1943#[unsafe(no_mangle)]
1944pub extern "C" fn tgm_file_open(path: *const c_char, out: *mut *mut TgmFile) -> TgmError {
1945    if path.is_null() || out.is_null() {
1946        set_last_error("null argument");
1947        return TgmError::InvalidArg;
1948    }
1949
1950    let path_str = match unsafe { CStr::from_ptr(path) }.to_str() {
1951        Ok(s) => s,
1952        Err(e) => {
1953            set_last_error(&format!("invalid UTF-8 in path: {e}"));
1954            return TgmError::InvalidArg;
1955        }
1956    };
1957
1958    match TensogramFile::open(path_str) {
1959        Ok(file) => {
1960            let path_string = CString::new(path_str).unwrap_or_default();
1961            let handle = Box::new(TgmFile { file, path_string });
1962            unsafe {
1963                *out = Box::into_raw(handle);
1964            }
1965            TgmError::Ok
1966        }
1967        Err(e) => {
1968            set_last_error(&e.to_string());
1969            to_error_code(&e)
1970        }
1971    }
1972}
1973
1974/// Create a new Tensogram file for writing.
1975#[unsafe(no_mangle)]
1976pub extern "C" fn tgm_file_create(path: *const c_char, out: *mut *mut TgmFile) -> TgmError {
1977    if path.is_null() || out.is_null() {
1978        set_last_error("null argument");
1979        return TgmError::InvalidArg;
1980    }
1981
1982    let path_str = match unsafe { CStr::from_ptr(path) }.to_str() {
1983        Ok(s) => s,
1984        Err(e) => {
1985            set_last_error(&format!("invalid UTF-8 in path: {e}"));
1986            return TgmError::InvalidArg;
1987        }
1988    };
1989
1990    match TensogramFile::create(path_str) {
1991        Ok(file) => {
1992            let path_string = CString::new(path_str).unwrap_or_default();
1993            let handle = Box::new(TgmFile { file, path_string });
1994            unsafe {
1995                *out = Box::into_raw(handle);
1996            }
1997            TgmError::Ok
1998        }
1999        Err(e) => {
2000            set_last_error(&e.to_string());
2001            to_error_code(&e)
2002        }
2003    }
2004}
2005
2006/// Count messages in the file (may trigger lazy scan).
2007#[unsafe(no_mangle)]
2008pub extern "C" fn tgm_file_message_count(file: *mut TgmFile, out_count: *mut usize) -> TgmError {
2009    if file.is_null() || out_count.is_null() {
2010        set_last_error("null argument");
2011        return TgmError::InvalidArg;
2012    }
2013
2014    let f = unsafe { &(*file).file };
2015    match f.message_count() {
2016        Ok(count) => {
2017            unsafe {
2018                *out_count = count;
2019            }
2020            TgmError::Ok
2021        }
2022        Err(e) => {
2023            set_last_error(&e.to_string());
2024            to_error_code(&e)
2025        }
2026    }
2027}
2028
2029/// Decode message at `index` from the file.
2030/// On success fills `out` with a `TgmMessage` handle.
2031#[unsafe(no_mangle)]
2032pub extern "C" fn tgm_file_decode_message(
2033    file: *mut TgmFile,
2034    index: usize,
2035    native_byte_order: i32,
2036    threads: u32,
2037    verify_hash: i32,
2038    out: *mut *mut TgmMessage,
2039) -> TgmError {
2040    if file.is_null() || out.is_null() {
2041        set_last_error("null argument");
2042        return TgmError::InvalidArg;
2043    }
2044
2045    let f = unsafe { &(*file).file };
2046    let options = DecodeOptions {
2047        native_byte_order: native_byte_order != 0,
2048        threads,
2049        verify_hash: verify_hash != 0,
2050        ..Default::default()
2051    };
2052
2053    match f.decode_message(index, &options) {
2054        Ok((global_metadata, objects)) => {
2055            // Inline hashes: re-read the raw message bytes and run
2056            // them through the cheap frame-header walker.  This
2057            // costs one extra message read (typically memory-
2058            // mapped) but gives FFI file-path callers parity with
2059            // the buffer path's hash accessors.  Silent fallback
2060            // to empty on read error — `decode_message` above
2061            // already succeeded so this branch is defensive.
2062            let inline_hashes = f
2063                .read_message(index)
2064                .ok()
2065                .and_then(|bytes| tensogram::framing::data_object_inline_hashes(&bytes).ok())
2066                .unwrap_or_default();
2067            let caches = build_message_caches(&objects, &inline_hashes);
2068            let msg = Box::new(TgmMessage {
2069                global_metadata,
2070                objects,
2071                dtype_strings: caches.dtype_strings,
2072                type_strings: caches.type_strings,
2073                byte_order_strings: caches.byte_order_strings,
2074                filter_strings: caches.filter_strings,
2075                compression_strings: caches.compression_strings,
2076                encoding_strings: caches.encoding_strings,
2077                hash_type_strings: caches.hash_type_strings,
2078                hash_value_strings: caches.hash_value_strings,
2079            });
2080            unsafe {
2081                *out = Box::into_raw(msg);
2082            }
2083            TgmError::Ok
2084        }
2085        Err(e) => {
2086            set_last_error(&e.to_string());
2087            to_error_code(&e)
2088        }
2089    }
2090}
2091
2092/// Read raw message bytes at `index`.
2093/// On success fills `out` with a `TgmBytes` buffer.
2094#[unsafe(no_mangle)]
2095pub extern "C" fn tgm_file_read_message(
2096    file: *mut TgmFile,
2097    index: usize,
2098    out: *mut TgmBytes,
2099) -> TgmError {
2100    if file.is_null() || out.is_null() {
2101        set_last_error("null argument");
2102        return TgmError::InvalidArg;
2103    }
2104
2105    let f = unsafe { &(*file).file };
2106
2107    match f.read_message(index) {
2108        Ok(bytes) => {
2109            // Rebuild via boxed slice to guarantee capacity == len for tgm_bytes_free.
2110            let mut bytes = bytes.into_boxed_slice().into_vec();
2111            let result = TgmBytes {
2112                data: bytes.as_mut_ptr(),
2113                len: bytes.len(),
2114            };
2115            std::mem::forget(bytes);
2116            unsafe {
2117                *out = result;
2118            }
2119            TgmError::Ok
2120        }
2121        Err(e) => {
2122            set_last_error(&e.to_string());
2123            to_error_code(&e)
2124        }
2125    }
2126}
2127
2128/// Append raw message bytes to the file.
2129#[unsafe(no_mangle)]
2130pub extern "C" fn tgm_file_append_raw(
2131    file: *mut TgmFile,
2132    buf: *const u8,
2133    buf_len: usize,
2134) -> TgmError {
2135    if file.is_null() || buf.is_null() {
2136        set_last_error("null argument");
2137        return TgmError::InvalidArg;
2138    }
2139
2140    let data = unsafe { slice::from_raw_parts(buf, buf_len) };
2141    let f = unsafe { &mut (*file).file };
2142
2143    // Write raw bytes using std::fs
2144    use std::io::Write;
2145    let path = match f.path() {
2146        Some(p) => p.to_path_buf(),
2147        None => {
2148            set_last_error("append_raw not supported on remote files");
2149            return TgmError::Remote;
2150        }
2151    };
2152    let result = std::fs::OpenOptions::new()
2153        .create(true)
2154        .append(true)
2155        .open(&path)
2156        .and_then(|mut fh| fh.write_all(data));
2157
2158    match result {
2159        Ok(()) => {
2160            f.invalidate_offsets();
2161            TgmError::Ok
2162        }
2163        Err(e) => {
2164            set_last_error(&e.to_string());
2165            TgmError::Io
2166        }
2167    }
2168}
2169
2170/// Returns the file path as a null-terminated string.
2171/// The pointer is valid until the file handle is closed.
2172#[unsafe(no_mangle)]
2173pub extern "C" fn tgm_file_path(file: *const TgmFile) -> *const c_char {
2174    if file.is_null() {
2175        return ptr::null();
2176    }
2177    unsafe { (*file).path_string.as_ptr() }
2178}
2179
2180/// Encode and append a message to the file.
2181/// Same JSON schema as `tgm_encode` for `metadata_json`.
2182///
2183/// Non-finite-value rejection is on by default in 0.17+; see `tgm_encode`.
2184#[unsafe(no_mangle)]
2185pub extern "C" fn tgm_file_append(
2186    file: *mut TgmFile,
2187    metadata_json: *const c_char,
2188    data_ptrs: *const *const u8,
2189    data_lens: *const usize,
2190    num_objects: usize,
2191    hash_algo: *const c_char,
2192    threads: u32,
2193) -> TgmError {
2194    if file.is_null() || metadata_json.is_null() {
2195        set_last_error("null argument");
2196        return TgmError::InvalidArg;
2197    }
2198
2199    let json_str = match unsafe { CStr::from_ptr(metadata_json) }.to_str() {
2200        Ok(s) => s,
2201        Err(e) => {
2202            set_last_error(&format!("invalid UTF-8 in metadata_json: {e}"));
2203            return TgmError::InvalidArg;
2204        }
2205    };
2206
2207    let parsed = match unsafe {
2208        parse_encode_args(
2209            json_str,
2210            data_ptrs,
2211            data_lens,
2212            num_objects,
2213            hash_algo,
2214            threads,
2215        )
2216    } {
2217        Ok(p) => p,
2218        Err((code, msg)) => {
2219            set_last_error(&msg);
2220            return code;
2221        }
2222    };
2223
2224    let pairs: Vec<(&DataObjectDescriptor, &[u8])> = parsed
2225        .descriptors
2226        .iter()
2227        .zip(parsed.data_slices.iter())
2228        .map(|(d, s)| (d, *s))
2229        .collect();
2230
2231    let f = unsafe { &mut (*file).file };
2232    match f.append(&parsed.global_metadata, &pairs, &parsed.options) {
2233        Ok(()) => TgmError::Ok,
2234        Err(e) => {
2235            set_last_error(&e.to_string());
2236            to_error_code(&e)
2237        }
2238    }
2239}
2240
2241/// Close a file handle and release resources.
2242#[unsafe(no_mangle)]
2243pub extern "C" fn tgm_file_close(file: *mut TgmFile) {
2244    if !file.is_null() {
2245        unsafe {
2246            drop(Box::from_raw(file));
2247        }
2248    }
2249}
2250
2251// ---------------------------------------------------------------------------
2252// Metadata key lookup helpers
2253// ---------------------------------------------------------------------------
2254
2255/// Look up a CBOR value by dot-notation key with arbitrary nesting depth.
2256///
2257/// Supports `"key"`, `"ns.field"`, `"grib.geography.Ni"`, etc.
2258/// Search order: `base[i]` (skip `_reserved_`, first match) → `extra`.
2259fn lookup_cbor_value<'a>(
2260    global_metadata: &'a GlobalMetadata,
2261    key: &str,
2262) -> Option<&'a ciborium::Value> {
2263    if key.is_empty() {
2264        return None;
2265    }
2266    let parts: Vec<&str> = key.split('.').collect();
2267
2268    if parts.is_empty() || parts[0].is_empty() {
2269        return None;
2270    }
2271    if parts[0] == "version" {
2272        return None; // use tgm_metadata_version instead
2273    }
2274
2275    // Explicit _extra_ or extra prefix targets the extra map directly
2276    if parts[0] == "_extra_" || parts[0] == "extra" {
2277        if parts.len() > 1 {
2278            return resolve_in_btree(&global_metadata.extra, &parts[1..]);
2279        }
2280        return None;
2281    }
2282
2283    // Search base entries (skip _reserved_ key within each entry)
2284    for entry in &global_metadata.base {
2285        if let Some(val) = resolve_in_btree_skip_reserved(entry, &parts) {
2286            return Some(val);
2287        }
2288    }
2289    // Fall back to extra
2290    resolve_in_btree(&global_metadata.extra, &parts)
2291}
2292
2293/// Walk a dot-path in a BTreeMap, skipping `_reserved_` keys at the first level.
2294fn resolve_in_btree_skip_reserved<'a>(
2295    map: &'a BTreeMap<String, ciborium::Value>,
2296    parts: &[&str],
2297) -> Option<&'a ciborium::Value> {
2298    let (first, rest) = parts.split_first()?;
2299    if *first == RESERVED_KEY {
2300        return None;
2301    }
2302    let value = map.get(*first)?;
2303    resolve_cbor_path(value, rest)
2304}
2305
2306/// Walk a dot-path in a BTreeMap (no _reserved_ filtering).
2307fn resolve_in_btree<'a>(
2308    map: &'a BTreeMap<String, ciborium::Value>,
2309    parts: &[&str],
2310) -> Option<&'a ciborium::Value> {
2311    let (first, rest) = parts.split_first()?;
2312    let value = map.get(*first)?;
2313    resolve_cbor_path(value, rest)
2314}
2315
2316/// Recursively walk remaining path segments into a CBOR value.
2317///
2318/// When no segments remain, returns the current value.
2319/// When segments remain, the current value must be a `Map` to navigate further.
2320fn resolve_cbor_path<'a>(
2321    value: &'a ciborium::Value,
2322    remaining: &[&str],
2323) -> Option<&'a ciborium::Value> {
2324    if remaining.is_empty() {
2325        return Some(value);
2326    }
2327    if let ciborium::Value::Map(entries) = value {
2328        for (k, v) in entries {
2329            if matches!(k, ciborium::Value::Text(s) if s == remaining[0]) {
2330                return resolve_cbor_path(v, &remaining[1..]);
2331            }
2332        }
2333    }
2334    None
2335}
2336
2337fn lookup_string_key(global_metadata: &GlobalMetadata, key: &str) -> Option<String> {
2338    if key.is_empty() {
2339        return None;
2340    }
2341    // `version` is a pseudo-key — the wire-format version lives in the
2342    // preamble (see `plans/WIRE_FORMAT.md` §3), not in the CBOR metadata
2343    // frame.  Return the constant so FFI tooling that queries the key
2344    // keeps seeing `"3"`.
2345    if key == "version" {
2346        return Some(tensogram::WIRE_VERSION.to_string());
2347    }
2348
2349    lookup_cbor_value(global_metadata, key).and_then(|v| match v {
2350        ciborium::Value::Text(s) => Some(s.clone()),
2351        ciborium::Value::Integer(i) => {
2352            let n: i128 = (*i).into();
2353            Some(n.to_string())
2354        }
2355        ciborium::Value::Float(f) => Some(f.to_string()),
2356        ciborium::Value::Bool(b) => Some(b.to_string()),
2357        _ => None,
2358    })
2359}
2360
2361fn lookup_int_key(global_metadata: &GlobalMetadata, key: &str) -> Option<i64> {
2362    // `version` pseudo-key — see `lookup_string_key` for rationale.
2363    if key == "version" {
2364        return Some(tensogram::WIRE_VERSION as i64);
2365    }
2366
2367    lookup_cbor_value(global_metadata, key).and_then(|v| match v {
2368        ciborium::Value::Integer(i) => {
2369            let n: i128 = (*i).into();
2370            i64::try_from(n).ok()
2371        }
2372        _ => None,
2373    })
2374}
2375
2376fn lookup_float_key(global_metadata: &GlobalMetadata, key: &str) -> Option<f64> {
2377    lookup_cbor_value(global_metadata, key).and_then(|v| match v {
2378        ciborium::Value::Float(f) => Some(*f),
2379        ciborium::Value::Integer(i) => {
2380            let n: i128 = (*i).into();
2381            // i128 → f64 may lose precision for very large integers, but this
2382            // is the expected behavior for a float accessor on an integer value.
2383            Some(n as f64)
2384        }
2385        _ => None,
2386    })
2387}
2388
2389// ---------------------------------------------------------------------------
2390// simple_packing direct access
2391// ---------------------------------------------------------------------------
2392
2393/// Compute simple_packing parameters for a set of f64 values.
2394///
2395/// Returns TgmError::Ok on success, filling the out-params.
2396/// Returns Encoding error if data contains NaN.
2397#[unsafe(no_mangle)]
2398pub extern "C" fn tgm_simple_packing_compute_params(
2399    values: *const f64,
2400    num_values: usize,
2401    bits_per_value: u32,
2402    decimal_scale_factor: i32,
2403    out_reference_value: *mut f64,
2404    out_binary_scale_factor: *mut i32,
2405) -> TgmError {
2406    if values.is_null() || out_reference_value.is_null() || out_binary_scale_factor.is_null() {
2407        set_last_error("null argument");
2408        return TgmError::InvalidArg;
2409    }
2410
2411    let vals = unsafe { slice::from_raw_parts(values, num_values) };
2412
2413    match tensogram_encodings::simple_packing::compute_params(
2414        vals,
2415        bits_per_value,
2416        decimal_scale_factor,
2417    ) {
2418        Ok(params) => {
2419            unsafe {
2420                *out_reference_value = params.reference_value;
2421                *out_binary_scale_factor = params.binary_scale_factor;
2422            }
2423            TgmError::Ok
2424        }
2425        Err(e) => {
2426            set_last_error(&e.to_string());
2427            TgmError::Encoding
2428        }
2429    }
2430}
2431
2432// ---------------------------------------------------------------------------
2433// Iterator API
2434// ---------------------------------------------------------------------------
2435
2436/// Opaque handle for iterating over messages in a byte buffer.
2437///
2438/// The caller's buffer must remain valid for the lifetime of this iterator.
2439pub struct TgmBufferIter {
2440    offsets: Vec<(usize, usize)>,
2441    buf_ptr: *const u8,
2442    pos: usize,
2443}
2444
2445/// Create a buffer message iterator.
2446///
2447/// Scans `buf` once and stores message boundaries. The buffer must remain
2448/// valid and unmodified until `tgm_buffer_iter_free` is called.
2449#[unsafe(no_mangle)]
2450pub extern "C" fn tgm_buffer_iter_create(
2451    buf: *const u8,
2452    buf_len: usize,
2453    out: *mut *mut TgmBufferIter,
2454) -> TgmError {
2455    if buf.is_null() || out.is_null() {
2456        set_last_error("null argument");
2457        return TgmError::InvalidArg;
2458    }
2459    let data = unsafe { slice::from_raw_parts(buf, buf_len) };
2460    let offsets = scan(data);
2461    let iter = Box::new(TgmBufferIter {
2462        offsets,
2463        buf_ptr: buf,
2464        pos: 0,
2465    });
2466    unsafe {
2467        *out = Box::into_raw(iter);
2468    }
2469    TgmError::Ok
2470}
2471
2472/// Return the total number of messages in the buffer iterator.
2473#[unsafe(no_mangle)]
2474pub extern "C" fn tgm_buffer_iter_count(iter: *const TgmBufferIter) -> usize {
2475    if iter.is_null() {
2476        return 0;
2477    }
2478    unsafe { (*iter).offsets.len() }
2479}
2480
2481/// Advance the buffer iterator. On success, sets `out_buf` and `out_len` to
2482/// the next message slice (borrowed from the original buffer).
2483///
2484/// Returns `TgmError::Ok` if a message is available, `TgmError::EndOfIter`
2485/// when iteration is exhausted.
2486#[unsafe(no_mangle)]
2487pub extern "C" fn tgm_buffer_iter_next(
2488    iter: *mut TgmBufferIter,
2489    out_buf: *mut *const u8,
2490    out_len: *mut usize,
2491) -> TgmError {
2492    if iter.is_null() || out_buf.is_null() || out_len.is_null() {
2493        set_last_error("null argument");
2494        return TgmError::InvalidArg;
2495    }
2496    let it = unsafe { &mut *iter };
2497    if it.pos >= it.offsets.len() {
2498        return TgmError::EndOfIter;
2499    }
2500    let (offset, length) = it.offsets[it.pos];
2501    it.pos += 1;
2502    unsafe {
2503        *out_buf = it.buf_ptr.add(offset);
2504        *out_len = length;
2505    }
2506    TgmError::Ok
2507}
2508
2509/// Free a buffer iterator handle.
2510#[unsafe(no_mangle)]
2511pub extern "C" fn tgm_buffer_iter_free(iter: *mut TgmBufferIter) {
2512    if !iter.is_null() {
2513        unsafe {
2514            drop(Box::from_raw(iter));
2515        }
2516    }
2517}
2518
2519/// Opaque handle for iterating over messages in a file.
2520pub struct TgmFileIter {
2521    inner: tensogram::FileMessageIter,
2522}
2523
2524/// Create a file message iterator from an open TgmFile.
2525///
2526/// Scans the file to locate message boundaries. The file handle remains
2527/// usable after this call.
2528#[unsafe(no_mangle)]
2529pub extern "C" fn tgm_file_iter_create(file: *mut TgmFile, out: *mut *mut TgmFileIter) -> TgmError {
2530    if file.is_null() || out.is_null() {
2531        set_last_error("null argument");
2532        return TgmError::InvalidArg;
2533    }
2534    let f = unsafe { &(*file).file };
2535    match f.iter() {
2536        Ok(inner) => {
2537            let iter = Box::new(TgmFileIter { inner });
2538            unsafe {
2539                *out = Box::into_raw(iter);
2540            }
2541            TgmError::Ok
2542        }
2543        Err(e) => {
2544            set_last_error(&e.to_string());
2545            to_error_code(&e)
2546        }
2547    }
2548}
2549
2550/// Advance the file iterator. On success, fills `out` with a `TgmBytes`
2551/// buffer containing the raw message bytes (caller owns, free with
2552/// `tgm_bytes_free`).
2553///
2554/// Returns `TgmError::Ok` when a message is available, `TgmError::EndOfIter`
2555/// when iteration is exhausted.
2556#[unsafe(no_mangle)]
2557pub extern "C" fn tgm_file_iter_next(iter: *mut TgmFileIter, out: *mut TgmBytes) -> TgmError {
2558    if iter.is_null() || out.is_null() {
2559        set_last_error("null argument");
2560        return TgmError::InvalidArg;
2561    }
2562    let it = unsafe { &mut (*iter).inner };
2563    match it.next() {
2564        None => TgmError::EndOfIter,
2565        Some(Err(e)) => {
2566            set_last_error(&e.to_string());
2567            to_error_code(&e)
2568        }
2569        Some(Ok(bytes)) => {
2570            // Rebuild via boxed slice to guarantee capacity == len for tgm_bytes_free.
2571            let mut bytes = bytes.into_boxed_slice().into_vec();
2572            let result = TgmBytes {
2573                data: bytes.as_mut_ptr(),
2574                len: bytes.len(),
2575            };
2576            std::mem::forget(bytes);
2577            unsafe {
2578                *out = result;
2579            }
2580            TgmError::Ok
2581        }
2582    }
2583}
2584
2585/// Free a file iterator handle.
2586#[unsafe(no_mangle)]
2587pub extern "C" fn tgm_file_iter_free(iter: *mut TgmFileIter) {
2588    if !iter.is_null() {
2589        unsafe {
2590            drop(Box::from_raw(iter));
2591        }
2592    }
2593}
2594
2595/// Opaque handle for iterating over objects within a single message.
2596pub struct TgmObjectIter {
2597    inner: tensogram::ObjectIter,
2598    /// Global metadata parsed from the message header, cloned into each
2599    /// yielded `TgmMessage` to preserve the original version and extra fields.
2600    global_metadata: GlobalMetadata,
2601}
2602
2603/// Create an object iterator from raw message bytes.
2604///
2605/// Parses metadata once, then decodes each object on demand when
2606/// `tgm_object_iter_next` is called. The global metadata from the
2607/// original message is preserved in each yielded `TgmMessage`.
2608#[unsafe(no_mangle)]
2609pub extern "C" fn tgm_object_iter_create(
2610    buf: *const u8,
2611    buf_len: usize,
2612    native_byte_order: i32,
2613    verify_hash: i32,
2614    out: *mut *mut TgmObjectIter,
2615) -> TgmError {
2616    if buf.is_null() || out.is_null() {
2617        set_last_error("null argument");
2618        return TgmError::InvalidArg;
2619    }
2620    let data = unsafe { slice::from_raw_parts(buf, buf_len) };
2621    let options = DecodeOptions {
2622        native_byte_order: native_byte_order != 0,
2623        verify_hash: verify_hash != 0,
2624        ..Default::default()
2625    };
2626
2627    // Parse global metadata from the message header so we can attach it to
2628    // each yielded TgmMessage instead of fabricating a default.
2629    let global_metadata = decode_metadata(data).unwrap_or_default();
2630
2631    match tensogram::objects(data, options) {
2632        Ok(inner) => {
2633            let iter = Box::new(TgmObjectIter {
2634                inner,
2635                global_metadata,
2636            });
2637            unsafe {
2638                *out = Box::into_raw(iter);
2639            }
2640            TgmError::Ok
2641        }
2642        Err(e) => {
2643            set_last_error(&e.to_string());
2644            to_error_code(&e)
2645        }
2646    }
2647}
2648
2649/// Advance the object iterator. On success, fills `out` with a `TgmMessage`
2650/// handle containing exactly one decoded object (the next in sequence).
2651///
2652/// Returns `TgmError::Ok` when an object is available, `TgmError::EndOfIter`
2653/// when iteration is exhausted. Free each yielded `TgmMessage` with
2654/// `tgm_message_free`.
2655#[unsafe(no_mangle)]
2656pub extern "C" fn tgm_object_iter_next(
2657    iter: *mut TgmObjectIter,
2658    out: *mut *mut TgmMessage,
2659) -> TgmError {
2660    if iter.is_null() || out.is_null() {
2661        set_last_error("null argument");
2662        return TgmError::InvalidArg;
2663    }
2664    let it = unsafe { &mut *iter };
2665    match it.inner.next() {
2666        None => TgmError::EndOfIter,
2667        Some(Err(e)) => {
2668            set_last_error(&e.to_string());
2669            to_error_code(&e)
2670        }
2671        Some(Ok((descriptor, data))) => {
2672            let global_metadata = it.global_metadata.clone();
2673            let objects = vec![(descriptor, data)];
2674            // Iterator path: the object iterator's `data` is the
2675            // already-decoded payload; the original frame's inline
2676            // hash slot isn't accessible from this layer without
2677            // re-reading from the source and re-scanning.  Callers
2678            // that need per-object hashes should either use
2679            // `tgm_file_decode_message` (which surfaces the hash
2680            // via the file-re-read path), or the buffer-based
2681            // `tgm_decode` if the raw bytes are already in memory.
2682            let caches = build_message_caches(&objects, &[]);
2683            let msg = Box::new(TgmMessage {
2684                global_metadata,
2685                objects,
2686                dtype_strings: caches.dtype_strings,
2687                type_strings: caches.type_strings,
2688                byte_order_strings: caches.byte_order_strings,
2689                filter_strings: caches.filter_strings,
2690                compression_strings: caches.compression_strings,
2691                encoding_strings: caches.encoding_strings,
2692                hash_type_strings: caches.hash_type_strings,
2693                hash_value_strings: caches.hash_value_strings,
2694            });
2695            unsafe {
2696                *out = Box::into_raw(msg);
2697            }
2698            TgmError::Ok
2699        }
2700    }
2701}
2702
2703/// Free an object iterator handle.
2704#[unsafe(no_mangle)]
2705pub extern "C" fn tgm_object_iter_free(iter: *mut TgmObjectIter) {
2706    if !iter.is_null() {
2707        unsafe {
2708            drop(Box::from_raw(iter));
2709        }
2710    }
2711}
2712
2713// ---------------------------------------------------------------------------
2714// Error code to string
2715// ---------------------------------------------------------------------------
2716
2717/// Convert an error code to a human-readable string.
2718/// Returns a static string (always valid, never NULL).
2719///
2720/// Accepts a raw integer and matches by value so that invalid discriminants
2721/// from C callers do not trigger undefined behaviour in Rust.
2722#[unsafe(no_mangle)]
2723pub extern "C" fn tgm_error_string(err: TgmError) -> *const c_char {
2724    // Convert to integer for safe matching — C callers may pass invalid values.
2725    let code = err as i32;
2726    let s: &[u8] = match code {
2727        0 => b"ok\0",
2728        1 => b"framing error\0",
2729        2 => b"metadata error\0",
2730        3 => b"encoding error\0",
2731        4 => b"compression error\0",
2732        5 => b"object error\0",
2733        6 => b"I/O error\0",
2734        7 => b"hash mismatch\0",
2735        8 => b"invalid argument\0",
2736        9 => b"end of iteration\0",
2737        10 => b"remote error\0",
2738        _ => b"unknown error\0",
2739    };
2740    s.as_ptr() as *const c_char
2741}
2742
2743// ---------------------------------------------------------------------------
2744// Hash utilities
2745// ---------------------------------------------------------------------------
2746
2747// ---------------------------------------------------------------------------
2748// Unit tests for metadata lookup helpers and JSON parsing
2749// ---------------------------------------------------------------------------
2750
2751#[cfg(test)]
2752mod tests {
2753    use super::*;
2754    use std::collections::BTreeMap;
2755
2756    fn make_meta(
2757        base: Vec<BTreeMap<String, ciborium::Value>>,
2758        extra: BTreeMap<String, ciborium::Value>,
2759    ) -> GlobalMetadata {
2760        GlobalMetadata {
2761            base,
2762            extra,
2763            ..Default::default()
2764        }
2765    }
2766
2767    // ── lookup_cbor_value ─────────────────────────────────────────────
2768
2769    #[test]
2770    fn lookup_cbor_empty_key() {
2771        let meta = make_meta(vec![], BTreeMap::new());
2772        assert!(lookup_cbor_value(&meta, "").is_none());
2773    }
2774
2775    #[test]
2776    fn lookup_cbor_dot_only() {
2777        let meta = make_meta(vec![], BTreeMap::new());
2778        assert!(lookup_cbor_value(&meta, ".").is_none());
2779    }
2780
2781    #[test]
2782    fn lookup_cbor_version_returns_none() {
2783        // version is handled by tgm_metadata_version, not lookup_cbor_value
2784        let meta = make_meta(vec![], BTreeMap::new());
2785        assert!(lookup_cbor_value(&meta, "version").is_none());
2786    }
2787
2788    #[test]
2789    fn lookup_cbor_base_match() {
2790        let mut entry = BTreeMap::new();
2791        entry.insert("centre".into(), ciborium::Value::Text("ecmwf".into()));
2792        let meta = make_meta(vec![entry], BTreeMap::new());
2793        let val = lookup_cbor_value(&meta, "centre");
2794        assert!(matches!(val, Some(ciborium::Value::Text(s)) if s == "ecmwf"));
2795    }
2796
2797    #[test]
2798    fn lookup_cbor_extra_fallback() {
2799        // Key not in base → found in extra
2800        let mut extra = BTreeMap::new();
2801        extra.insert("source".into(), ciborium::Value::Text("test".into()));
2802        let meta = make_meta(vec![], extra);
2803        let val = lookup_cbor_value(&meta, "source");
2804        assert!(matches!(val, Some(ciborium::Value::Text(s)) if s == "test"));
2805    }
2806
2807    #[test]
2808    fn lookup_cbor_no_match() {
2809        let meta = make_meta(vec![], BTreeMap::new());
2810        assert!(lookup_cbor_value(&meta, "nonexistent").is_none());
2811    }
2812
2813    #[test]
2814    fn lookup_cbor_reserved_skipped() {
2815        let mut entry = BTreeMap::new();
2816        entry.insert(
2817            "_reserved_".into(),
2818            ciborium::Value::Map(vec![(
2819                ciborium::Value::Text("tensor".into()),
2820                ciborium::Value::Text("internal".into()),
2821            )]),
2822        );
2823        entry.insert("param".into(), ciborium::Value::Text("2t".into()));
2824        let meta = make_meta(vec![entry], BTreeMap::new());
2825        // _reserved_ path should be skipped
2826        assert!(lookup_cbor_value(&meta, "_reserved_.tensor").is_none());
2827        // Regular key should still be found
2828        assert!(lookup_cbor_value(&meta, "param").is_some());
2829    }
2830
2831    #[test]
2832    fn lookup_cbor_extra_prefix() {
2833        let mut extra = BTreeMap::new();
2834        extra.insert("custom".into(), ciborium::Value::Text("val".into()));
2835        let meta = make_meta(vec![], extra);
2836        // _extra_.custom should resolve directly in extra
2837        let val = lookup_cbor_value(&meta, "_extra_.custom");
2838        assert!(matches!(val, Some(ciborium::Value::Text(s)) if s == "val"));
2839    }
2840
2841    #[test]
2842    fn lookup_cbor_extra_alias_prefix() {
2843        let mut extra = BTreeMap::new();
2844        extra.insert("custom".into(), ciborium::Value::Text("val".into()));
2845        let meta = make_meta(vec![], extra);
2846        // extra.custom should also resolve in extra
2847        let val = lookup_cbor_value(&meta, "extra.custom");
2848        assert!(matches!(val, Some(ciborium::Value::Text(s)) if s == "val"));
2849    }
2850
2851    #[test]
2852    fn lookup_cbor_extra_prefix_alone_returns_none() {
2853        let meta = make_meta(vec![], BTreeMap::new());
2854        // Bare "_extra_" without subkey returns None
2855        assert!(lookup_cbor_value(&meta, "_extra_").is_none());
2856        assert!(lookup_cbor_value(&meta, "extra").is_none());
2857    }
2858
2859    #[test]
2860    fn lookup_cbor_base_wins_over_extra() {
2861        let mut entry = BTreeMap::new();
2862        entry.insert("shared".into(), ciborium::Value::Text("from_base".into()));
2863        let mut extra = BTreeMap::new();
2864        extra.insert("shared".into(), ciborium::Value::Text("from_extra".into()));
2865        let meta = make_meta(vec![entry], extra);
2866        let val = lookup_cbor_value(&meta, "shared");
2867        assert!(matches!(val, Some(ciborium::Value::Text(s)) if s == "from_base"));
2868    }
2869
2870    #[test]
2871    fn lookup_cbor_deeply_nested() {
2872        let e_val = ciborium::Value::Map(vec![(
2873            ciborium::Value::Text("e".into()),
2874            ciborium::Value::Text("deep".into()),
2875        )]);
2876        let d_val = ciborium::Value::Map(vec![(ciborium::Value::Text("d".into()), e_val)]);
2877        let c_val = ciborium::Value::Map(vec![(ciborium::Value::Text("c".into()), d_val)]);
2878        let b_val = ciborium::Value::Map(vec![(ciborium::Value::Text("b".into()), c_val)]);
2879        let mut entry = BTreeMap::new();
2880        entry.insert("a".into(), b_val);
2881        let meta = make_meta(vec![entry], BTreeMap::new());
2882        let val = lookup_cbor_value(&meta, "a.b.c.d.e");
2883        assert!(matches!(val, Some(ciborium::Value::Text(s)) if s == "deep"));
2884    }
2885
2886    #[test]
2887    fn lookup_cbor_multi_base_first_match() {
2888        let mut entry0 = BTreeMap::new();
2889        entry0.insert("param".into(), ciborium::Value::Text("2t".into()));
2890        let mut entry1 = BTreeMap::new();
2891        entry1.insert("param".into(), ciborium::Value::Text("msl".into()));
2892        let meta = make_meta(vec![entry0, entry1], BTreeMap::new());
2893        let val = lookup_cbor_value(&meta, "param");
2894        assert!(matches!(val, Some(ciborium::Value::Text(s)) if s == "2t"));
2895    }
2896
2897    // ── resolve_cbor_path ─────────────────────────────────────────────
2898
2899    #[test]
2900    fn resolve_cbor_path_empty_remaining() {
2901        let value = ciborium::Value::Text("hello".into());
2902        assert_eq!(resolve_cbor_path(&value, &[]), Some(&value));
2903    }
2904
2905    #[test]
2906    fn resolve_cbor_path_non_map_with_remaining() {
2907        let value = ciborium::Value::Text("hello".into());
2908        assert!(resolve_cbor_path(&value, &["key"]).is_none());
2909    }
2910
2911    #[test]
2912    fn resolve_cbor_path_map_missing_key() {
2913        let value = ciborium::Value::Map(vec![(
2914            ciborium::Value::Text("a".into()),
2915            ciborium::Value::Text("b".into()),
2916        )]);
2917        assert!(resolve_cbor_path(&value, &["missing"]).is_none());
2918    }
2919
2920    // ── lookup_string_key ──
2921
2922    #[test]
2923    fn lookup_string_key_version() {
2924        let meta = make_meta(vec![], BTreeMap::new());
2925        assert_eq!(lookup_string_key(&meta, "version"), Some("3".into()));
2926    }
2927
2928    #[test]
2929    fn lookup_string_key_empty() {
2930        let meta = make_meta(vec![], BTreeMap::new());
2931        assert!(lookup_string_key(&meta, "").is_none());
2932    }
2933
2934    #[test]
2935    fn lookup_string_key_integer_value() {
2936        let mut entry = BTreeMap::new();
2937        entry.insert("count".into(), ciborium::Value::Integer(42.into()));
2938        let meta = make_meta(vec![entry], BTreeMap::new());
2939        assert_eq!(lookup_string_key(&meta, "count"), Some("42".into()));
2940    }
2941
2942    #[test]
2943    fn lookup_string_key_float_value() {
2944        let mut extra = BTreeMap::new();
2945        extra.insert("temperature".into(), ciborium::Value::Float(98.6));
2946        let meta = make_meta(vec![], extra);
2947        assert_eq!(lookup_string_key(&meta, "temperature"), Some("98.6".into()));
2948    }
2949
2950    #[test]
2951    fn lookup_string_key_bool_value() {
2952        let mut extra = BTreeMap::new();
2953        extra.insert("flag".into(), ciborium::Value::Bool(true));
2954        let meta = make_meta(vec![], extra);
2955        assert_eq!(lookup_string_key(&meta, "flag"), Some("true".into()));
2956    }
2957
2958    #[test]
2959    fn lookup_string_key_null_returns_none() {
2960        let mut extra = BTreeMap::new();
2961        extra.insert("nothing".into(), ciborium::Value::Null);
2962        let meta = make_meta(vec![], extra);
2963        // Null is not a string/int/float/bool, so returns None
2964        assert!(lookup_string_key(&meta, "nothing").is_none());
2965    }
2966
2967    // ── lookup_int_key ──
2968
2969    #[test]
2970    fn lookup_int_key_version() {
2971        let meta = make_meta(vec![], BTreeMap::new());
2972        assert_eq!(lookup_int_key(&meta, "version"), Some(3));
2973    }
2974
2975    #[test]
2976    fn lookup_int_key_non_integer() {
2977        let mut extra = BTreeMap::new();
2978        extra.insert("str".into(), ciborium::Value::Text("not_int".into()));
2979        let meta = make_meta(vec![], extra);
2980        assert!(lookup_int_key(&meta, "str").is_none());
2981    }
2982
2983    // ── lookup_float_key ──
2984
2985    #[test]
2986    fn lookup_float_key_float() {
2987        let mut extra = BTreeMap::new();
2988        extra.insert("val".into(), ciborium::Value::Float(98.6));
2989        let meta = make_meta(vec![], extra);
2990        assert_eq!(lookup_float_key(&meta, "val"), Some(98.6));
2991    }
2992
2993    #[test]
2994    fn lookup_float_key_integer_coercion() {
2995        let mut extra = BTreeMap::new();
2996        extra.insert("count".into(), ciborium::Value::Integer(42.into()));
2997        let meta = make_meta(vec![], extra);
2998        assert_eq!(lookup_float_key(&meta, "count"), Some(42.0));
2999    }
3000
3001    #[test]
3002    fn lookup_float_key_non_numeric() {
3003        let mut extra = BTreeMap::new();
3004        extra.insert("str".into(), ciborium::Value::Text("hello".into()));
3005        let meta = make_meta(vec![], extra);
3006        assert!(lookup_float_key(&meta, "str").is_none());
3007    }
3008
3009    // ── parse_encode_json ──
3010
3011    #[test]
3012    fn parse_encode_json_with_base() {
3013        let json = r#"{"version":3,"base":[{"mars":{"param":"2t"}}],"descriptors":[]}"#;
3014        let (gm, descs) = parse_encode_json(json).unwrap();
3015        assert_eq!(gm.base.len(), 1);
3016        assert!(gm.base[0].contains_key("mars"));
3017        assert!(descs.is_empty());
3018    }
3019
3020    #[test]
3021    fn parse_encode_json_legacy_version_routed_to_extra() {
3022        // A caller-supplied legacy top-level `"version"` lands in
3023        // `_extra_["version"]` on decode — matching the Python /
3024        // TypeScript / Rust-core contract.  See Copilot review on
3025        // PR #80.
3026        let json = r#"{"version":3,"descriptors":[]}"#;
3027        let (gm, _) = parse_encode_json(json).unwrap();
3028        assert_eq!(
3029            gm.extra.get("version"),
3030            Some(&ciborium::Value::Integer(3u64.into())),
3031            "legacy JSON `version` must round-trip via `_extra_`"
3032        );
3033    }
3034
3035    #[test]
3036    fn parse_encode_json_free_form_top_level_routed_to_extra() {
3037        // Parity with the Rust core + Python: unknown JSON top-level
3038        // keys flow into `_extra_`.
3039        let json = r#"{"source":"test","count":42,"descriptors":[]}"#;
3040        let (gm, _) = parse_encode_json(json).unwrap();
3041        assert_eq!(
3042            gm.extra.get("source"),
3043            Some(&ciborium::Value::Text("test".to_string()))
3044        );
3045        assert_eq!(
3046            gm.extra.get("count"),
3047            Some(&ciborium::Value::Integer(42u64.into()))
3048        );
3049    }
3050
3051    #[test]
3052    fn parse_encode_json_explicit_extra_unpacked() {
3053        // An explicit `"_extra_"` section at the top level of the FFI
3054        // JSON must be unpacked into `GlobalMetadata.extra` — matching
3055        // the Python / TypeScript / Rust-core contract.  If `_extra_`
3056        // were treated as just another free-form key, a caller doing
3057        // `{"_extra_": {"foo": "bar"}}` would end up with a nested
3058        // `_extra_._extra_.foo` on the wire — clearly wrong.
3059        let json = r#"{"_extra_":{"foo":"bar","count":7},"descriptors":[]}"#;
3060        let (gm, _) = parse_encode_json(json).unwrap();
3061        assert_eq!(
3062            gm.extra.get("foo"),
3063            Some(&ciborium::Value::Text("bar".to_string())),
3064            "explicit `_extra_.foo` must surface at the top level of `extra`"
3065        );
3066        assert_eq!(
3067            gm.extra.get("count"),
3068            Some(&ciborium::Value::Integer(7u64.into())),
3069            "explicit `_extra_.count` must surface at the top level of `extra`"
3070        );
3071        assert!(
3072            !gm.extra.contains_key("_extra_"),
3073            "there must be no nested `_extra_` key inside `extra`"
3074        );
3075    }
3076
3077    #[test]
3078    fn parse_encode_json_explicit_extra_beats_free_form() {
3079        // `explicit beats implicit`: when both an explicit `_extra_.X`
3080        // and a free-form top-level `X` are supplied, the explicit
3081        // entry wins.  Matches the Rust core + Python / TS behaviour.
3082        let json = r#"{"version":99,"_extra_":{"version":1},"descriptors":[]}"#;
3083        let (gm, _) = parse_encode_json(json).unwrap();
3084        assert_eq!(
3085            gm.extra.get("version"),
3086            Some(&ciborium::Value::Integer(1u64.into())),
3087            "explicit _extra_.version must win over top-level version"
3088        );
3089    }
3090
3091    #[test]
3092    fn parse_encode_json_without_base() {
3093        let json = r#"{"version":3,"descriptors":[]}"#;
3094        let (gm, _) = parse_encode_json(json).unwrap();
3095        assert!(gm.base.is_empty());
3096    }
3097
3098    #[test]
3099    fn parse_encode_json_reserved_in_base_rejected() {
3100        let json = r#"{"version":3,"base":[{"_reserved_":{"tensor":{}}}],"descriptors":[]}"#;
3101        let result = parse_encode_json(json);
3102        assert!(result.is_err());
3103        assert!(result.unwrap_err().contains("_reserved_"));
3104    }
3105
3106    #[test]
3107    fn parse_encode_json_extra_keys() {
3108        let json = r#"{"version":3,"descriptors":[],"source":"test","count":42}"#;
3109        let (gm, _) = parse_encode_json(json).unwrap();
3110        assert!(gm.extra.contains_key("source"));
3111        assert!(gm.extra.contains_key("count"));
3112    }
3113
3114    // ── parse_streaming_metadata_json ──
3115
3116    #[test]
3117    fn parse_streaming_json_with_base() {
3118        let json = r#"{"version":3,"base":[{"mars":{"param":"2t"}}]}"#;
3119        let gm = parse_streaming_metadata_json(json).unwrap();
3120        assert_eq!(gm.base.len(), 1);
3121    }
3122
3123    #[test]
3124    fn parse_streaming_json_reserved_rejected() {
3125        let json = r#"{"version":3,"base":[{"_reserved_":{"tensor":{}}}]}"#;
3126        let result = parse_streaming_metadata_json(json);
3127        assert!(result.is_err());
3128        assert!(result.unwrap_err().contains("_reserved_"));
3129    }
3130
3131    #[test]
3132    fn parse_streaming_json_no_base() {
3133        let json = r#"{"version":3,"source":"stream"}"#;
3134        let gm = parse_streaming_metadata_json(json).unwrap();
3135        assert!(gm.base.is_empty());
3136        assert!(gm.extra.contains_key("source"));
3137    }
3138
3139    #[test]
3140    fn parse_streaming_json_explicit_extra_unpacked() {
3141        // Streaming path must honour the same `_extra_` unpacking as
3142        // `parse_encode_json`.
3143        let json = r#"{"_extra_":{"foo":"bar"}}"#;
3144        let gm = parse_streaming_metadata_json(json).unwrap();
3145        assert_eq!(
3146            gm.extra.get("foo"),
3147            Some(&ciborium::Value::Text("bar".to_string()))
3148        );
3149        assert!(!gm.extra.contains_key("_extra_"));
3150    }
3151
3152    #[test]
3153    fn parse_streaming_json_explicit_extra_beats_free_form() {
3154        // `explicit beats implicit` on the streaming path too.
3155        let json = r#"{"version":99,"_extra_":{"version":1}}"#;
3156        let gm = parse_streaming_metadata_json(json).unwrap();
3157        assert_eq!(
3158            gm.extra.get("version"),
3159            Some(&ciborium::Value::Integer(1u64.into()))
3160        );
3161    }
3162
3163    #[test]
3164    fn parse_streaming_json_invalid_json() {
3165        assert!(parse_streaming_metadata_json("not json").is_err());
3166    }
3167
3168    #[test]
3169    fn parse_encode_json_rejects_both_extra_aliases() {
3170        // `_extra_` and `extra` are aliases for the same concept;
3171        // supplying both is ambiguous and rejected by the helper.
3172        let json = r#"{"_extra_":{"a":1},"extra":{"b":2},"descriptors":[]}"#;
3173        let err = parse_encode_json(json).unwrap_err();
3174        assert!(
3175            err.contains("both '_extra_' and 'extra'"),
3176            "unexpected error: {err}"
3177        );
3178    }
3179
3180    #[test]
3181    fn parse_encode_json_rejects_non_object_extra() {
3182        // `_extra_` must be a JSON object.  A scalar is a caller
3183        // error and surfaces a clear message.
3184        let json = r#"{"_extra_":42,"descriptors":[]}"#;
3185        let err = parse_encode_json(json).unwrap_err();
3186        assert!(
3187            err.contains("'_extra_' must be a JSON object"),
3188            "unexpected error: {err}"
3189        );
3190    }
3191
3192    #[test]
3193    fn parse_encode_json_invalid_json() {
3194        assert!(parse_encode_json("not json").is_err());
3195    }
3196
3197    // ── json_to_cbor ──
3198
3199    #[test]
3200    fn json_to_cbor_null() {
3201        assert_eq!(json_to_cbor(serde_json::Value::Null), ciborium::Value::Null);
3202    }
3203
3204    #[test]
3205    fn json_to_cbor_bool() {
3206        assert_eq!(
3207            json_to_cbor(serde_json::Value::Bool(true)),
3208            ciborium::Value::Bool(true)
3209        );
3210    }
3211
3212    #[test]
3213    fn json_to_cbor_integer() {
3214        let val = serde_json::json!(42);
3215        let cbor = json_to_cbor(val);
3216        assert!(matches!(cbor, ciborium::Value::Integer(_)));
3217    }
3218
3219    #[test]
3220    fn json_to_cbor_float() {
3221        let val = serde_json::json!(98.6);
3222        let cbor = json_to_cbor(val);
3223        assert!(matches!(cbor, ciborium::Value::Float(_)));
3224    }
3225
3226    #[test]
3227    fn json_to_cbor_string() {
3228        let val = serde_json::json!("hello");
3229        let cbor = json_to_cbor(val);
3230        assert!(matches!(cbor, ciborium::Value::Text(s) if s == "hello"));
3231    }
3232
3233    #[test]
3234    fn json_to_cbor_array() {
3235        let val = serde_json::json!([1, 2, 3]);
3236        let cbor = json_to_cbor(val);
3237        assert!(matches!(cbor, ciborium::Value::Array(_)));
3238    }
3239
3240    #[test]
3241    fn json_to_cbor_object() {
3242        let val = serde_json::json!({"key": "value"});
3243        let cbor = json_to_cbor(val);
3244        assert!(matches!(cbor, ciborium::Value::Map(_)));
3245    }
3246
3247    #[test]
3248    fn json_to_cbor_u64_fallback_to_float() {
3249        // A number that is not i64 but is u64 → falls back to float
3250        // (JSON numbers outside i64 range)
3251        let val = serde_json::json!(18446744073709551615u64);
3252        let cbor = json_to_cbor(val);
3253        // This should be either Integer or Float depending on serde_json parsing
3254        assert!(!matches!(cbor, ciborium::Value::Null));
3255    }
3256
3257    // ── resolve helpers ──
3258
3259    #[test]
3260    fn resolve_in_btree_skip_reserved_blocks_reserved() {
3261        let mut map = BTreeMap::new();
3262        map.insert("_reserved_".into(), ciborium::Value::Text("secret".into()));
3263        assert!(resolve_in_btree_skip_reserved(&map, &["_reserved_"]).is_none());
3264    }
3265
3266    #[test]
3267    fn resolve_in_btree_empty_parts() {
3268        let map = BTreeMap::new();
3269        assert!(resolve_in_btree(&map, &[]).is_none());
3270    }
3271
3272    #[test]
3273    fn resolve_in_btree_skip_reserved_empty_parts() {
3274        let map = BTreeMap::new();
3275        assert!(resolve_in_btree_skip_reserved(&map, &[]).is_none());
3276    }
3277
3278    // ── validate FFI ──
3279
3280    #[test]
3281    fn parse_validate_options_default() {
3282        let opts = match super::parse_validate_options(ptr::null(), 0) {
3283            Ok(opts) => opts,
3284            Err((_code, msg)) => panic!("expected default options, got error: {msg}"),
3285        };
3286        assert_eq!(opts.max_level, ValidationLevel::Integrity);
3287        assert!(!opts.check_canonical);
3288        assert!(!opts.checksum_only);
3289    }
3290
3291    #[test]
3292    fn parse_validate_options_quick() {
3293        let level = CString::new("quick").unwrap();
3294        let opts = match super::parse_validate_options(level.as_ptr(), 0) {
3295            Ok(opts) => opts,
3296            Err((_code, msg)) => panic!("expected quick options, got error: {msg}"),
3297        };
3298        assert_eq!(opts.max_level, ValidationLevel::Structure);
3299    }
3300
3301    #[test]
3302    fn parse_validate_options_full_canonical() {
3303        let level = CString::new("full").unwrap();
3304        let opts = match super::parse_validate_options(level.as_ptr(), 1) {
3305            Ok(opts) => opts,
3306            Err((_code, msg)) => panic!("expected full options, got error: {msg}"),
3307        };
3308        assert_eq!(opts.max_level, ValidationLevel::Fidelity);
3309        assert!(opts.check_canonical);
3310    }
3311
3312    #[test]
3313    fn parse_validate_options_unknown_level() {
3314        let level = CString::new("bogus").unwrap();
3315        let result = super::parse_validate_options(level.as_ptr(), 0);
3316        assert!(result.is_err());
3317    }
3318
3319    #[test]
3320    fn parse_validate_options_checksum() {
3321        let level = CString::new("checksum").unwrap();
3322        let opts = match super::parse_validate_options(level.as_ptr(), 0) {
3323            Ok(opts) => opts,
3324            Err((_code, msg)) => panic!("expected checksum options, got error: {msg}"),
3325        };
3326        assert_eq!(opts.max_level, ValidationLevel::Integrity);
3327        assert!(opts.checksum_only);
3328    }
3329
3330    // ── tgm_validate end-to-end ──
3331
3332    fn encode_test_message() -> Vec<u8> {
3333        let meta = GlobalMetadata::default();
3334        let desc = DataObjectDescriptor {
3335            obj_type: "ntensor".to_string(),
3336            ndim: 1,
3337            shape: vec![4],
3338            strides: vec![1],
3339            dtype: tensogram::Dtype::Float32,
3340            byte_order: tensogram::ByteOrder::native(),
3341            encoding: "none".to_string(),
3342            filter: "none".to_string(),
3343            compression: "none".to_string(),
3344            params: BTreeMap::new(),
3345            masks: None,
3346        };
3347        let data: Vec<u8> = [1.0f32, 2.0, 3.0, 4.0]
3348            .iter()
3349            .flat_map(|v| v.to_ne_bytes())
3350            .collect();
3351        tensogram::encode(&meta, &[(&desc, data.as_slice())], &Default::default()).unwrap()
3352    }
3353
3354    #[test]
3355    fn tgm_validate_valid_message() {
3356        let msg = encode_test_message();
3357        let mut out = super::TgmBytes {
3358            data: ptr::null_mut(),
3359            len: 0,
3360        };
3361        let err = super::tgm_validate(msg.as_ptr(), msg.len(), ptr::null(), 0, &mut out);
3362        assert!(matches!(err, super::TgmError::Ok));
3363        assert!(!out.data.is_null());
3364        assert!(out.len > 0);
3365        let json_str =
3366            unsafe { std::str::from_utf8(std::slice::from_raw_parts(out.data, out.len)).unwrap() };
3367        assert!(json_str.contains("\"issues\":[]"));
3368        assert!(json_str.contains("\"object_count\":1"));
3369        super::tgm_bytes_free(out);
3370    }
3371
3372    #[test]
3373    fn tgm_validate_empty_buffer() {
3374        let mut out = super::TgmBytes {
3375            data: ptr::null_mut(),
3376            len: 0,
3377        };
3378        let err = super::tgm_validate(ptr::null(), 0, ptr::null(), 0, &mut out);
3379        assert!(matches!(err, super::TgmError::Ok));
3380        let json_str =
3381            unsafe { std::str::from_utf8(std::slice::from_raw_parts(out.data, out.len)).unwrap() };
3382        assert!(json_str.contains("\"buffer_too_short\""));
3383        super::tgm_bytes_free(out);
3384    }
3385
3386    #[test]
3387    fn tgm_validate_invalid_level() {
3388        let msg = encode_test_message();
3389        let level = CString::new("bogus").unwrap();
3390        let mut out = super::TgmBytes {
3391            data: ptr::null_mut(),
3392            len: 0,
3393        };
3394        let err = super::tgm_validate(msg.as_ptr(), msg.len(), level.as_ptr(), 0, &mut out);
3395        assert!(matches!(err, super::TgmError::InvalidArg));
3396    }
3397
3398    #[test]
3399    fn tgm_validate_null_out() {
3400        let msg = encode_test_message();
3401        let err = super::tgm_validate(msg.as_ptr(), msg.len(), ptr::null(), 0, ptr::null_mut());
3402        assert!(matches!(err, super::TgmError::InvalidArg));
3403    }
3404
3405    #[test]
3406    fn tgm_validate_file_nonexistent() {
3407        let path = CString::new("/nonexistent/path/to/file.tgm").unwrap();
3408        let mut out = super::TgmBytes {
3409            data: ptr::null_mut(),
3410            len: 0,
3411        };
3412        let err = super::tgm_validate_file(path.as_ptr(), ptr::null(), 0, &mut out);
3413        assert!(matches!(err, super::TgmError::Io));
3414    }
3415
3416    #[test]
3417    fn tgm_validate_file_null_out() {
3418        let path = CString::new("/tmp/dummy.tgm").unwrap();
3419        let err = super::tgm_validate_file(path.as_ptr(), ptr::null(), 0, ptr::null_mut());
3420        assert!(matches!(err, super::TgmError::InvalidArg));
3421    }
3422
3423    #[test]
3424    fn tgm_validate_file_invalid_level() {
3425        let path = CString::new("/tmp/dummy.tgm").unwrap();
3426        let level = CString::new("bogus").unwrap();
3427        let mut out = super::TgmBytes {
3428            data: ptr::null_mut(),
3429            len: 0,
3430        };
3431        let err = super::tgm_validate_file(path.as_ptr(), level.as_ptr(), 0, &mut out);
3432        assert!(matches!(err, super::TgmError::InvalidArg));
3433    }
3434
3435    // =====================================================================
3436    // FFI round-trip tests — exercise #[no_mangle] extern "C" functions
3437    // =====================================================================
3438
3439    /// Helper: build a JSON metadata string and raw data for a single float32
3440    /// tensor, encode via `tgm_encode`, and return the encoded bytes.
3441    fn ffi_encode_single_f32_tensor(values: &[f32], extra_json: &str) -> Vec<u8> {
3442        let shape_str = format!("[{}]", values.len());
3443        let json = format!(
3444            r#"{{"version":3,"descriptors":[{{"type":"ntensor","ndim":1,"shape":{shape},"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}}]{extra}}}"#,
3445            shape = shape_str,
3446            bo = if cfg!(target_endian = "little") {
3447                "little"
3448            } else {
3449                "big"
3450            },
3451            extra = if extra_json.is_empty() {
3452                String::new()
3453            } else {
3454                format!(",{extra_json}")
3455            },
3456        );
3457
3458        let data: Vec<u8> = values.iter().flat_map(|v| v.to_ne_bytes()).collect();
3459        let c_json = CString::new(json).unwrap();
3460        let data_ptr: *const u8 = data.as_ptr();
3461        let data_len: usize = data.len();
3462
3463        let mut out = super::TgmBytes {
3464            data: ptr::null_mut(),
3465            len: 0,
3466        };
3467
3468        let err = super::tgm_encode(
3469            c_json.as_ptr(),
3470            &data_ptr as *const *const u8,
3471            &data_len as *const usize,
3472            1,
3473            ptr::null(), // no hash
3474            0,           // threads
3475            &mut out,
3476        );
3477        assert!(matches!(err, super::TgmError::Ok), "tgm_encode failed");
3478        assert!(!out.data.is_null());
3479        assert!(out.len > 0);
3480
3481        let encoded = unsafe { slice::from_raw_parts(out.data, out.len) }.to_vec();
3482        super::tgm_bytes_free(out);
3483        encoded
3484    }
3485
3486    /// Helper: encode with hash enabled.
3487    fn ffi_encode_with_hash(values: &[f32]) -> Vec<u8> {
3488        let shape_str = format!("[{}]", values.len());
3489        let json = format!(
3490            r#"{{"version":3,"descriptors":[{{"type":"ntensor","ndim":1,"shape":{shape},"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}}]}}"#,
3491            shape = shape_str,
3492            bo = if cfg!(target_endian = "little") {
3493                "little"
3494            } else {
3495                "big"
3496            },
3497        );
3498
3499        let data: Vec<u8> = values.iter().flat_map(|v| v.to_ne_bytes()).collect();
3500        let c_json = CString::new(json).unwrap();
3501        let hash_algo = CString::new("xxh3").unwrap();
3502        let data_ptr: *const u8 = data.as_ptr();
3503        let data_len: usize = data.len();
3504
3505        let mut out = super::TgmBytes {
3506            data: ptr::null_mut(),
3507            len: 0,
3508        };
3509
3510        let err = super::tgm_encode(
3511            c_json.as_ptr(),
3512            &data_ptr as *const *const u8,
3513            &data_len as *const usize,
3514            1,
3515            hash_algo.as_ptr(),
3516            0,
3517            &mut out,
3518        );
3519        assert!(
3520            matches!(err, super::TgmError::Ok),
3521            "tgm_encode with hash failed"
3522        );
3523
3524        let encoded = unsafe { slice::from_raw_parts(out.data, out.len) }.to_vec();
3525        super::tgm_bytes_free(out);
3526        encoded
3527    }
3528
3529    // ── tgm_encode / tgm_decode round-trip ──
3530
3531    #[test]
3532    fn ffi_encode_decode_round_trip() {
3533        let values = [1.0f32, 2.0, 3.0, 4.0];
3534        let encoded = ffi_encode_single_f32_tensor(&values, "");
3535
3536        // Decode
3537        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3538        let err = super::tgm_decode(
3539            encoded.as_ptr(),
3540            encoded.len(),
3541            0, // no native byte order rewrite
3542            0, // threads
3543            0, // verify_hash
3544            &mut msg,
3545        );
3546        assert!(matches!(err, super::TgmError::Ok));
3547        assert!(!msg.is_null());
3548
3549        // Message-level accessors
3550        assert_eq!(super::tgm_message_version(msg), 3);
3551        assert_eq!(super::tgm_message_num_objects(msg), 1);
3552        assert_eq!(super::tgm_message_num_decoded(msg), 1);
3553
3554        // Object-level accessors
3555        assert_eq!(super::tgm_object_ndim(msg, 0), 1);
3556
3557        let shape_ptr = super::tgm_object_shape(msg, 0);
3558        assert!(!shape_ptr.is_null());
3559        assert_eq!(unsafe { *shape_ptr }, 4);
3560
3561        let strides_ptr = super::tgm_object_strides(msg, 0);
3562        assert!(!strides_ptr.is_null());
3563        assert_eq!(unsafe { *strides_ptr }, 1);
3564
3565        // dtype string
3566        let dtype_ptr = super::tgm_object_dtype(msg, 0);
3567        assert!(!dtype_ptr.is_null());
3568        let dtype_str = unsafe { CStr::from_ptr(dtype_ptr) }.to_str().unwrap();
3569        assert_eq!(dtype_str, "float32");
3570
3571        // type string
3572        let type_ptr = super::tgm_object_type(msg, 0);
3573        assert!(!type_ptr.is_null());
3574        let type_str = unsafe { CStr::from_ptr(type_ptr) }.to_str().unwrap();
3575        assert_eq!(type_str, "ntensor");
3576
3577        // byte_order string
3578        let bo_ptr = super::tgm_object_byte_order(msg, 0);
3579        assert!(!bo_ptr.is_null());
3580        let bo_str = unsafe { CStr::from_ptr(bo_ptr) }.to_str().unwrap();
3581        assert!(bo_str == "little" || bo_str == "big");
3582
3583        // filter string
3584        let filter_ptr = super::tgm_object_filter(msg, 0);
3585        assert!(!filter_ptr.is_null());
3586        let filter_str = unsafe { CStr::from_ptr(filter_ptr) }.to_str().unwrap();
3587        assert_eq!(filter_str, "none");
3588
3589        // compression string
3590        let comp_ptr = super::tgm_object_compression(msg, 0);
3591        assert!(!comp_ptr.is_null());
3592        let comp_str = unsafe { CStr::from_ptr(comp_ptr) }.to_str().unwrap();
3593        assert_eq!(comp_str, "none");
3594
3595        // encoding string
3596        let enc_ptr = super::tgm_payload_encoding(msg, 0);
3597        assert!(!enc_ptr.is_null());
3598        let enc_str = unsafe { CStr::from_ptr(enc_ptr) }.to_str().unwrap();
3599        assert_eq!(enc_str, "none");
3600
3601        // decoded data
3602        let mut data_len: usize = 0;
3603        let data_ptr = super::tgm_object_data(msg, 0, &mut data_len);
3604        assert!(!data_ptr.is_null());
3605        assert_eq!(data_len, 16); // 4 × 4 bytes
3606
3607        let decoded_bytes = unsafe { slice::from_raw_parts(data_ptr, data_len) };
3608        let decoded_values: Vec<f32> = decoded_bytes
3609            .chunks_exact(4)
3610            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
3611            .collect();
3612        assert_eq!(decoded_values, values);
3613
3614        super::tgm_message_free(msg);
3615    }
3616
3617    #[test]
3618    fn ffi_encode_decode_with_hash() {
3619        let values = [10.0f32, 20.0, 30.0];
3620        let encoded = ffi_encode_with_hash(&values);
3621
3622        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3623        let err = super::tgm_decode(
3624            encoded.as_ptr(),
3625            encoded.len(),
3626            0,
3627            0, // threads
3628            0, // verify_hash
3629            &mut msg,
3630        );
3631        assert!(matches!(err, super::TgmError::Ok));
3632
3633        // Hash should be present
3634        assert_eq!(super::tgm_payload_has_hash(msg, 0), 1);
3635
3636        let ht_ptr = super::tgm_object_hash_type(msg, 0);
3637        assert!(!ht_ptr.is_null());
3638        let ht_str = unsafe { CStr::from_ptr(ht_ptr) }.to_str().unwrap();
3639        assert_eq!(ht_str, "xxh3");
3640
3641        let hv_ptr = super::tgm_object_hash_value(msg, 0);
3642        assert!(!hv_ptr.is_null());
3643        let hv_str = unsafe { CStr::from_ptr(hv_ptr) }.to_str().unwrap();
3644        assert!(!hv_str.is_empty());
3645
3646        super::tgm_message_free(msg);
3647    }
3648
3649    #[test]
3650    fn ffi_encode_decode_no_hash() {
3651        let values = [5.0f32];
3652        let encoded = ffi_encode_single_f32_tensor(&values, "");
3653
3654        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3655        let err = super::tgm_decode(encoded.as_ptr(), encoded.len(), 0, 0, 0, &mut msg);
3656        assert!(matches!(err, super::TgmError::Ok));
3657
3658        assert_eq!(super::tgm_payload_has_hash(msg, 0), 0);
3659        assert!(super::tgm_object_hash_type(msg, 0).is_null());
3660        assert!(super::tgm_object_hash_value(msg, 0).is_null());
3661
3662        super::tgm_message_free(msg);
3663    }
3664
3665    // ── verify_hash on the FFI surface ───────────────────────────────
3666
3667    #[test]
3668    fn ffi_decode_verify_hash_succeeds_on_hashed_message() {
3669        // Cell B: hashed message + verify_hash=1 → Ok.
3670        let encoded = ffi_encode_with_hash(&[1.0f32, 2.0]);
3671        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3672        let err = super::tgm_decode(
3673            encoded.as_ptr(),
3674            encoded.len(),
3675            0,
3676            0,
3677            1, // verify_hash
3678            &mut msg,
3679        );
3680        assert!(matches!(err, super::TgmError::Ok));
3681        assert!(!msg.is_null());
3682        super::tgm_message_free(msg);
3683    }
3684
3685    #[test]
3686    fn ffi_decode_verify_hash_returns_missing_hash_on_unhashed_message() {
3687        // Cell C: unhashed message + verify_hash=1 → MissingHash.
3688        let encoded = ffi_encode_single_f32_tensor(&[5.0f32], "");
3689        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3690        let err = super::tgm_decode(
3691            encoded.as_ptr(),
3692            encoded.len(),
3693            0,
3694            0,
3695            1, // verify_hash
3696            &mut msg,
3697        );
3698        assert!(
3699            matches!(err, super::TgmError::MissingHash),
3700            "expected MissingHash, got error code {}",
3701            err as i32
3702        );
3703        let last = unsafe { CStr::from_ptr(super::tgm_last_error()) }
3704            .to_str()
3705            .unwrap();
3706        assert!(
3707            last.contains("object 0"),
3708            "last error should name the offending object: {last}"
3709        );
3710        // No message handle was returned — nothing to free.
3711    }
3712
3713    #[test]
3714    fn ffi_decode_verify_hash_returns_hash_mismatch_on_tampered_slot() {
3715        // Cell D: hashed message with a flipped inline-hash-slot
3716        // byte + verify_hash=1 → HashMismatch.  Tampering the
3717        // slot (rather than the body) keeps the rest of the frame
3718        // structurally valid so the CBOR descriptor parses
3719        // cleanly — only the inline-hash check fires.  See
3720        // `decode_verify_hash.rs` (Rust core) for the cell E
3721        // variant where the payload itself is tampered.
3722        let mut encoded = ffi_encode_with_hash(&[10.0f32, 20.0, 30.0]);
3723        // Locate the message footer's preceding object frame and
3724        // flip a byte of the 8-byte hash slot, which lives at
3725        // `frame_end - 12` for every frame.  Walking from the
3726        // preamble end (24) we find the first NTensorFrame.
3727        let frame_start = {
3728            let mut pos = 24usize;
3729            loop {
3730                assert!(pos + 16 <= encoded.len(), "frame not found");
3731                if &encoded[pos..pos + 2] == b"FR"
3732                    && tensogram::wire::FrameHeader::read_from(&encoded[pos..])
3733                        .map(|fh| fh.frame_type.is_data_object())
3734                        .unwrap_or(false)
3735                {
3736                    break pos;
3737                }
3738                pos += 1;
3739            }
3740        };
3741        let fh = tensogram::wire::FrameHeader::read_from(&encoded[frame_start..]).unwrap();
3742        let frame_end = frame_start + fh.total_length as usize;
3743        let slot_byte = frame_end - 12; // first byte of the 8-byte slot
3744        encoded[slot_byte] ^= 0xFF;
3745
3746        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3747        let err = super::tgm_decode(
3748            encoded.as_ptr(),
3749            encoded.len(),
3750            0,
3751            0,
3752            1, // verify_hash
3753            &mut msg,
3754        );
3755        assert!(
3756            matches!(err, super::TgmError::HashMismatch),
3757            "expected HashMismatch, got error code {}",
3758            err as i32
3759        );
3760        let last = unsafe { CStr::from_ptr(super::tgm_last_error()) }
3761            .to_str()
3762            .unwrap();
3763        assert!(
3764            last.contains("object 0"),
3765            "last error should name the offending object: {last}"
3766        );
3767    }
3768
3769    #[test]
3770    fn ffi_decode_verify_hash_off_silently_decodes_unhashed_message() {
3771        // Cell A complement — no verify, unhashed message decodes
3772        // cleanly (the existing `ffi_encode_decode_no_hash` test
3773        // covers this implicitly; here we add an explicit
3774        // verify_hash=0 assertion to pin the default).
3775        let encoded = ffi_encode_single_f32_tensor(&[5.0f32], "");
3776        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3777        let err = super::tgm_decode(
3778            encoded.as_ptr(),
3779            encoded.len(),
3780            0,
3781            0,
3782            0, // verify_hash off
3783            &mut msg,
3784        );
3785        assert!(matches!(err, super::TgmError::Ok));
3786        super::tgm_message_free(msg);
3787    }
3788
3789    #[test]
3790    fn ffi_decode_object_verify_hash_returns_missing_hash_on_unhashed() {
3791        // Cell C for tgm_decode_object.
3792        let encoded = ffi_encode_single_f32_tensor(&[5.0f32], "");
3793        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3794        let err = super::tgm_decode_object(
3795            encoded.as_ptr(),
3796            encoded.len(),
3797            0,
3798            0,
3799            0,
3800            1, // verify_hash
3801            &mut msg,
3802        );
3803        assert!(
3804            matches!(err, super::TgmError::MissingHash),
3805            "expected MissingHash, got error code {}",
3806            err as i32
3807        );
3808    }
3809
3810    #[test]
3811    fn ffi_encode_with_extra_metadata() {
3812        let values = [1.0f32, 2.0];
3813        let encoded = ffi_encode_single_f32_tensor(&values, r#""source":"test_source","count":42"#);
3814
3815        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3816        let err = super::tgm_decode(encoded.as_ptr(), encoded.len(), 0, 0, 0, &mut msg);
3817        assert!(matches!(err, super::TgmError::Ok));
3818
3819        // Extract metadata from decoded message
3820        let mut meta: *mut super::TgmMetadata = ptr::null_mut();
3821        let err = super::tgm_message_metadata(msg, &mut meta);
3822        assert!(matches!(err, super::TgmError::Ok));
3823
3824        let key = CString::new("source").unwrap();
3825        let val_ptr = super::tgm_metadata_get_string(meta, key.as_ptr());
3826        assert!(!val_ptr.is_null());
3827        let val_str = unsafe { CStr::from_ptr(val_ptr) }.to_str().unwrap();
3828        assert_eq!(val_str, "test_source");
3829
3830        let key_count = CString::new("count").unwrap();
3831        let val_int = super::tgm_metadata_get_int(meta, key_count.as_ptr(), -1);
3832        assert_eq!(val_int, 42);
3833
3834        super::tgm_metadata_free(meta);
3835        super::tgm_message_free(msg);
3836    }
3837
3838    // ── tgm_encode null/error paths ──
3839
3840    #[test]
3841    fn ffi_encode_null_json() {
3842        let mut out = super::TgmBytes {
3843            data: ptr::null_mut(),
3844            len: 0,
3845        };
3846        let err = super::tgm_encode(
3847            ptr::null(),
3848            ptr::null(),
3849            ptr::null(),
3850            0,
3851            ptr::null(),
3852            0,
3853            &mut out,
3854        );
3855        assert!(matches!(err, super::TgmError::InvalidArg));
3856    }
3857
3858    #[test]
3859    fn ffi_encode_null_out() {
3860        let json = CString::new(r#"{"version":3,"descriptors":[]}"#).unwrap();
3861        let err = super::tgm_encode(
3862            json.as_ptr(),
3863            ptr::null(),
3864            ptr::null(),
3865            0,
3866            ptr::null(),
3867            0,
3868            ptr::null_mut(),
3869        );
3870        assert!(matches!(err, super::TgmError::InvalidArg));
3871    }
3872
3873    #[test]
3874    fn ffi_encode_descriptor_count_mismatch() {
3875        // JSON says 0 descriptors, but num_objects = 1
3876        let json = CString::new(r#"{"version":3,"descriptors":[]}"#).unwrap();
3877        let data: [u8; 4] = [0; 4];
3878        let data_ptr: *const u8 = data.as_ptr();
3879        let data_len: usize = 4;
3880        let mut out = super::TgmBytes {
3881            data: ptr::null_mut(),
3882            len: 0,
3883        };
3884        let err = super::tgm_encode(
3885            json.as_ptr(),
3886            &data_ptr as *const *const u8,
3887            &data_len as *const usize,
3888            1, // mismatch!
3889            ptr::null(),
3890            0, // threads
3891            &mut out,
3892        );
3893        assert!(matches!(err, super::TgmError::InvalidArg));
3894    }
3895
3896    #[test]
3897    fn ffi_encode_invalid_json() {
3898        let json = CString::new("not valid json").unwrap();
3899        let mut out = super::TgmBytes {
3900            data: ptr::null_mut(),
3901            len: 0,
3902        };
3903        let err = super::tgm_encode(
3904            json.as_ptr(),
3905            ptr::null(),
3906            ptr::null(),
3907            0,
3908            ptr::null(),
3909            0,
3910            &mut out,
3911        );
3912        assert!(matches!(err, super::TgmError::Metadata));
3913    }
3914
3915    // ── tgm_decode null/error paths ──
3916
3917    #[test]
3918    fn ffi_decode_null_buf() {
3919        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3920        let err = super::tgm_decode(ptr::null(), 0, 0, 0, 0, &mut msg);
3921        assert!(matches!(err, super::TgmError::InvalidArg));
3922    }
3923
3924    #[test]
3925    fn ffi_decode_null_out() {
3926        let data = [0u8; 10];
3927        let err = super::tgm_decode(data.as_ptr(), data.len(), 0, 0, 0, ptr::null_mut());
3928        assert!(matches!(err, super::TgmError::InvalidArg));
3929    }
3930
3931    #[test]
3932    fn ffi_decode_garbage_data() {
3933        let data = [0u8; 10];
3934        let mut msg: *mut super::TgmMessage = ptr::null_mut();
3935        let err = super::tgm_decode(data.as_ptr(), data.len(), 0, 0, 0, &mut msg);
3936        // Should fail with a framing or other error
3937        assert!(!matches!(err, super::TgmError::Ok));
3938    }
3939
3940    // ── tgm_decode_metadata round-trip ──
3941
3942    #[test]
3943    fn ffi_decode_metadata_round_trip() {
3944        let values = [1.0f32, 2.0];
3945        let encoded = ffi_encode_single_f32_tensor(&values, r#""source":"meta_test""#);
3946
3947        let mut meta: *mut super::TgmMetadata = ptr::null_mut();
3948        let err = super::tgm_decode_metadata(encoded.as_ptr(), encoded.len(), &mut meta);
3949        assert!(matches!(err, super::TgmError::Ok));
3950        assert!(!meta.is_null());
3951
3952        // Version
3953        assert_eq!(super::tgm_metadata_version(meta), 3);
3954
3955        // num_objects
3956        assert_eq!(super::tgm_metadata_num_objects(meta), 1);
3957
3958        // String lookup
3959        let key = CString::new("source").unwrap();
3960        let val_ptr = super::tgm_metadata_get_string(meta, key.as_ptr());
3961        assert!(!val_ptr.is_null());
3962        let val_str = unsafe { CStr::from_ptr(val_ptr) }.to_str().unwrap();
3963        assert_eq!(val_str, "meta_test");
3964
3965        // Missing key returns null
3966        let bad_key = CString::new("nonexistent").unwrap();
3967        assert!(super::tgm_metadata_get_string(meta, bad_key.as_ptr()).is_null());
3968
3969        // Int with default
3970        let bad_key2 = CString::new("missing_int").unwrap();
3971        assert_eq!(
3972            super::tgm_metadata_get_int(meta, bad_key2.as_ptr(), -999),
3973            -999
3974        );
3975
3976        // Float with default
3977        let bad_key3 = CString::new("missing_float").unwrap();
3978        let fval = super::tgm_metadata_get_float(meta, bad_key3.as_ptr(), 3.25);
3979        assert!((fval - 3.25).abs() < f64::EPSILON);
3980
3981        super::tgm_metadata_free(meta);
3982    }
3983
3984    #[test]
3985    fn ffi_decode_metadata_null_args() {
3986        let mut meta: *mut super::TgmMetadata = ptr::null_mut();
3987        let err = super::tgm_decode_metadata(ptr::null(), 0, &mut meta);
3988        assert!(matches!(err, super::TgmError::InvalidArg));
3989
3990        let data = [0u8; 10];
3991        let err = super::tgm_decode_metadata(data.as_ptr(), data.len(), ptr::null_mut());
3992        assert!(matches!(err, super::TgmError::InvalidArg));
3993    }
3994
3995    // ── tgm_metadata null pointer safety ──
3996
3997    #[test]
3998    fn ffi_metadata_accessors_null_handle() {
3999        assert_eq!(super::tgm_metadata_version(ptr::null()), 0);
4000        assert_eq!(super::tgm_metadata_num_objects(ptr::null()), 0);
4001        assert!(super::tgm_metadata_get_string(ptr::null(), ptr::null()).is_null());
4002        assert_eq!(
4003            super::tgm_metadata_get_int(ptr::null(), ptr::null(), -1),
4004            -1
4005        );
4006        assert_eq!(
4007            super::tgm_metadata_get_float(ptr::null(), ptr::null(), 1.5),
4008            1.5
4009        );
4010    }
4011
4012    #[test]
4013    fn ffi_metadata_get_string_null_key() {
4014        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
4015        let mut meta: *mut super::TgmMetadata = ptr::null_mut();
4016        let err = super::tgm_decode_metadata(encoded.as_ptr(), encoded.len(), &mut meta);
4017        assert!(matches!(err, super::TgmError::Ok));
4018
4019        assert!(super::tgm_metadata_get_string(meta, ptr::null()).is_null());
4020        assert_eq!(super::tgm_metadata_get_int(meta, ptr::null(), -1), -1);
4021        assert_eq!(super::tgm_metadata_get_float(meta, ptr::null(), 1.5), 1.5);
4022
4023        super::tgm_metadata_free(meta);
4024    }
4025
4026    #[test]
4027    fn ffi_metadata_get_version_via_string() {
4028        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
4029        let mut meta: *mut super::TgmMetadata = ptr::null_mut();
4030        let err = super::tgm_decode_metadata(encoded.as_ptr(), encoded.len(), &mut meta);
4031        assert!(matches!(err, super::TgmError::Ok));
4032
4033        let key = CString::new("version").unwrap();
4034        let val_ptr = super::tgm_metadata_get_string(meta, key.as_ptr());
4035        assert!(!val_ptr.is_null());
4036        let val_str = unsafe { CStr::from_ptr(val_ptr) }.to_str().unwrap();
4037        assert_eq!(val_str, "3");
4038
4039        let ival = super::tgm_metadata_get_int(meta, key.as_ptr(), -1);
4040        assert_eq!(ival, 3);
4041
4042        super::tgm_metadata_free(meta);
4043    }
4044
4045    #[test]
4046    fn ffi_metadata_get_float_value() {
4047        let values = [1.0f32];
4048        let encoded = ffi_encode_single_f32_tensor(&values, r#""temperature":98.6"#);
4049
4050        let mut meta: *mut super::TgmMetadata = ptr::null_mut();
4051        let err = super::tgm_decode_metadata(encoded.as_ptr(), encoded.len(), &mut meta);
4052        assert!(matches!(err, super::TgmError::Ok));
4053
4054        let key = CString::new("temperature").unwrap();
4055        let fval = super::tgm_metadata_get_float(meta, key.as_ptr(), 0.0);
4056        assert!((fval - 98.6).abs() < 0.01);
4057
4058        super::tgm_metadata_free(meta);
4059    }
4060
4061    // ── tgm_decode_object ──
4062
4063    #[test]
4064    fn ffi_decode_object_round_trip() {
4065        let values = [10.0f32, 20.0, 30.0, 40.0];
4066        let encoded = ffi_encode_single_f32_tensor(&values, "");
4067
4068        let mut msg: *mut super::TgmMessage = ptr::null_mut();
4069        let err = super::tgm_decode_object(
4070            encoded.as_ptr(),
4071            encoded.len(),
4072            0, // index
4073            0, // native byte order
4074            0, // threads
4075            0, // verify_hash
4076            &mut msg,
4077        );
4078        assert!(matches!(err, super::TgmError::Ok));
4079        assert!(!msg.is_null());
4080
4081        // Single object in result
4082        assert_eq!(super::tgm_message_num_objects(msg), 1);
4083        assert_eq!(super::tgm_object_ndim(msg, 0), 1);
4084
4085        let mut data_len: usize = 0;
4086        let data_ptr = super::tgm_object_data(msg, 0, &mut data_len);
4087        assert!(!data_ptr.is_null());
4088        let decoded_bytes = unsafe { slice::from_raw_parts(data_ptr, data_len) };
4089        let decoded_values: Vec<f32> = decoded_bytes
4090            .chunks_exact(4)
4091            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
4092            .collect();
4093        assert_eq!(decoded_values, values);
4094
4095        super::tgm_message_free(msg);
4096    }
4097
4098    #[test]
4099    fn ffi_decode_object_out_of_range() {
4100        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
4101        let mut msg: *mut super::TgmMessage = ptr::null_mut();
4102        let err = super::tgm_decode_object(
4103            encoded.as_ptr(),
4104            encoded.len(),
4105            999, // out of range
4106            0,
4107            0, // threads
4108            0, // verify_hash
4109            &mut msg,
4110        );
4111        assert!(!matches!(err, super::TgmError::Ok));
4112    }
4113
4114    #[test]
4115    fn ffi_decode_object_null_args() {
4116        let mut msg: *mut super::TgmMessage = ptr::null_mut();
4117        let err = super::tgm_decode_object(ptr::null(), 0, 0, 0, 0, 0, &mut msg);
4118        assert!(matches!(err, super::TgmError::InvalidArg));
4119
4120        let data = [0u8; 10];
4121        let err = super::tgm_decode_object(data.as_ptr(), data.len(), 0, 0, 0, 0, ptr::null_mut());
4122        assert!(matches!(err, super::TgmError::InvalidArg));
4123    }
4124
4125    // ── tgm_message_metadata ──
4126
4127    #[test]
4128    fn ffi_message_metadata_null_args() {
4129        let err = super::tgm_message_metadata(ptr::null(), ptr::null_mut());
4130        assert!(matches!(err, super::TgmError::InvalidArg));
4131
4132        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
4133        let mut msg: *mut super::TgmMessage = ptr::null_mut();
4134        let err = super::tgm_decode(encoded.as_ptr(), encoded.len(), 0, 0, 0, &mut msg);
4135        assert!(matches!(err, super::TgmError::Ok));
4136
4137        let err = super::tgm_message_metadata(msg, ptr::null_mut());
4138        assert!(matches!(err, super::TgmError::InvalidArg));
4139
4140        super::tgm_message_free(msg);
4141    }
4142
4143    // ── tgm_message accessors with null msg ──
4144
4145    #[test]
4146    fn ffi_message_accessors_null_handle() {
4147        assert_eq!(super::tgm_message_version(ptr::null()), 0);
4148        assert_eq!(super::tgm_message_num_objects(ptr::null()), 0);
4149        assert_eq!(super::tgm_message_num_decoded(ptr::null()), 0);
4150        assert_eq!(super::tgm_object_ndim(ptr::null(), 0), 0);
4151        assert!(super::tgm_object_shape(ptr::null(), 0).is_null());
4152        assert!(super::tgm_object_strides(ptr::null(), 0).is_null());
4153        assert!(super::tgm_object_dtype(ptr::null(), 0).is_null());
4154        assert!(super::tgm_object_type(ptr::null(), 0).is_null());
4155        assert!(super::tgm_object_byte_order(ptr::null(), 0).is_null());
4156        assert!(super::tgm_object_filter(ptr::null(), 0).is_null());
4157        assert!(super::tgm_object_compression(ptr::null(), 0).is_null());
4158        assert!(super::tgm_payload_encoding(ptr::null(), 0).is_null());
4159        assert_eq!(super::tgm_payload_has_hash(ptr::null(), 0), 0);
4160        assert!(super::tgm_object_hash_type(ptr::null(), 0).is_null());
4161        assert!(super::tgm_object_hash_value(ptr::null(), 0).is_null());
4162
4163        let mut data_len: usize = 99;
4164        let data_ptr = super::tgm_object_data(ptr::null(), 0, &mut data_len);
4165        assert!(data_ptr.is_null());
4166        assert_eq!(data_len, 0);
4167    }
4168
4169    // ── tgm_message accessors out-of-bounds index ──
4170
4171    #[test]
4172    fn ffi_message_accessors_out_of_bounds() {
4173        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
4174        let mut msg: *mut super::TgmMessage = ptr::null_mut();
4175        let err = super::tgm_decode(encoded.as_ptr(), encoded.len(), 0, 0, 0, &mut msg);
4176        assert!(matches!(err, super::TgmError::Ok));
4177
4178        // Index 1 does not exist (only index 0)
4179        assert_eq!(super::tgm_object_ndim(msg, 1), 0);
4180        assert!(super::tgm_object_shape(msg, 1).is_null());
4181        assert!(super::tgm_object_strides(msg, 1).is_null());
4182        assert!(super::tgm_object_dtype(msg, 1).is_null());
4183        assert!(super::tgm_object_type(msg, 1).is_null());
4184        assert!(super::tgm_object_byte_order(msg, 1).is_null());
4185        assert!(super::tgm_object_filter(msg, 1).is_null());
4186        assert!(super::tgm_object_compression(msg, 1).is_null());
4187        assert!(super::tgm_payload_encoding(msg, 1).is_null());
4188        assert_eq!(super::tgm_payload_has_hash(msg, 1), 0);
4189        assert!(super::tgm_object_hash_type(msg, 1).is_null());
4190        assert!(super::tgm_object_hash_value(msg, 1).is_null());
4191
4192        let mut data_len: usize = 99;
4193        let data_ptr = super::tgm_object_data(msg, 1, &mut data_len);
4194        assert!(data_ptr.is_null());
4195        assert_eq!(data_len, 0);
4196
4197        super::tgm_message_free(msg);
4198    }
4199
4200    // ── tgm_object_data with null out_len ──
4201
4202    #[test]
4203    fn ffi_object_data_null_out_len() {
4204        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
4205        let mut msg: *mut super::TgmMessage = ptr::null_mut();
4206        let err = super::tgm_decode(encoded.as_ptr(), encoded.len(), 0, 0, 0, &mut msg);
4207        assert!(matches!(err, super::TgmError::Ok));
4208
4209        // null out_len should not crash
4210        let data_ptr = super::tgm_object_data(msg, 0, ptr::null_mut());
4211        assert!(!data_ptr.is_null());
4212
4213        super::tgm_message_free(msg);
4214    }
4215
4216    // ── tgm_decode_range ──
4217
4218    #[test]
4219    fn ffi_decode_range_round_trip() {
4220        let values = [1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
4221        let encoded = ffi_encode_single_f32_tensor(&values, "");
4222
4223        // Request elements [2..5) (3 elements)
4224        let range_offset: u64 = 2;
4225        let range_count: u64 = 3;
4226        let mut out_buf = super::TgmBytes {
4227            data: ptr::null_mut(),
4228            len: 0,
4229        };
4230        let mut out_count: usize = 0;
4231
4232        let err = super::tgm_decode_range(
4233            encoded.as_ptr(),
4234            encoded.len(),
4235            0,
4236            &range_offset as *const u64,
4237            &range_count as *const u64,
4238            1,
4239            0, // no native byte order
4240            0, // threads
4241            1, // join
4242            &mut out_buf,
4243            &mut out_count,
4244        );
4245        assert!(matches!(err, super::TgmError::Ok));
4246        assert_eq!(out_count, 1);
4247        assert!(!out_buf.data.is_null());
4248
4249        let decoded_bytes = unsafe { slice::from_raw_parts(out_buf.data, out_buf.len) };
4250        let decoded_values: Vec<f32> = decoded_bytes
4251            .chunks_exact(4)
4252            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
4253            .collect();
4254        assert_eq!(decoded_values, [3.0, 4.0, 5.0]);
4255
4256        super::tgm_bytes_free(out_buf);
4257    }
4258
4259    #[test]
4260    fn ffi_decode_range_split_mode() {
4261        let values = [10.0f32, 20.0, 30.0, 40.0];
4262        let encoded = ffi_encode_single_f32_tensor(&values, "");
4263
4264        // Two ranges: [0..2), [2..4)
4265        let range_offsets: [u64; 2] = [0, 2];
4266        let range_counts: [u64; 2] = [2, 2];
4267        let mut out_bufs = [
4268            super::TgmBytes {
4269                data: ptr::null_mut(),
4270                len: 0,
4271            },
4272            super::TgmBytes {
4273                data: ptr::null_mut(),
4274                len: 0,
4275            },
4276        ];
4277        let mut out_count: usize = 0;
4278
4279        let err = super::tgm_decode_range(
4280            encoded.as_ptr(),
4281            encoded.len(),
4282            0,
4283            range_offsets.as_ptr(),
4284            range_counts.as_ptr(),
4285            2,
4286            0,
4287            0, // threads
4288            0, // split mode (join=0)
4289            out_bufs.as_mut_ptr(),
4290            &mut out_count,
4291        );
4292        assert!(matches!(err, super::TgmError::Ok));
4293        assert_eq!(out_count, 2);
4294
4295        // First range: [10.0, 20.0]
4296        let bytes0 = unsafe { slice::from_raw_parts(out_bufs[0].data, out_bufs[0].len) };
4297        let vals0: Vec<f32> = bytes0
4298            .chunks_exact(4)
4299            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
4300            .collect();
4301        assert_eq!(vals0, [10.0, 20.0]);
4302
4303        // Second range: [30.0, 40.0]
4304        let bytes1 = unsafe { slice::from_raw_parts(out_bufs[1].data, out_bufs[1].len) };
4305        let vals1: Vec<f32> = bytes1
4306            .chunks_exact(4)
4307            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
4308            .collect();
4309        assert_eq!(vals1, [30.0, 40.0]);
4310
4311        // TgmBytes is not Copy, so manually construct values for free
4312        super::tgm_bytes_free(super::TgmBytes {
4313            data: out_bufs[0].data,
4314            len: out_bufs[0].len,
4315        });
4316        super::tgm_bytes_free(super::TgmBytes {
4317            data: out_bufs[1].data,
4318            len: out_bufs[1].len,
4319        });
4320    }
4321
4322    #[test]
4323    fn ffi_decode_range_null_args() {
4324        let mut out_buf = super::TgmBytes {
4325            data: ptr::null_mut(),
4326            len: 0,
4327        };
4328        let mut out_count: usize = 0;
4329
4330        // null buf
4331        let err = super::tgm_decode_range(
4332            ptr::null(),
4333            0,
4334            0,
4335            ptr::null(),
4336            ptr::null(),
4337            0,
4338            0,
4339            0,
4340            0,
4341            &mut out_buf,
4342            &mut out_count,
4343        );
4344        assert!(matches!(err, super::TgmError::InvalidArg));
4345
4346        // null out
4347        let data = [0u8; 10];
4348        let err = super::tgm_decode_range(
4349            data.as_ptr(),
4350            data.len(),
4351            0,
4352            ptr::null(),
4353            ptr::null(),
4354            0,
4355            0,
4356            0,
4357            0,
4358            ptr::null_mut(),
4359            &mut out_count,
4360        );
4361        assert!(matches!(err, super::TgmError::InvalidArg));
4362
4363        // null out_count
4364        let err = super::tgm_decode_range(
4365            data.as_ptr(),
4366            data.len(),
4367            0,
4368            ptr::null(),
4369            ptr::null(),
4370            0,
4371            0,
4372            0,
4373            0,
4374            &mut out_buf,
4375            ptr::null_mut(),
4376        );
4377        assert!(matches!(err, super::TgmError::InvalidArg));
4378    }
4379
4380    #[test]
4381    fn ffi_decode_range_null_ranges_with_nonzero_count() {
4382        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
4383        let mut out_buf = super::TgmBytes {
4384            data: ptr::null_mut(),
4385            len: 0,
4386        };
4387        let mut out_count: usize = 0;
4388
4389        let err = super::tgm_decode_range(
4390            encoded.as_ptr(),
4391            encoded.len(),
4392            0,
4393            ptr::null(), // null ranges_offsets
4394            ptr::null(), // null ranges_counts
4395            1,           // but num_ranges > 0
4396            0,
4397            0, // threads
4398            0,
4399            &mut out_buf,
4400            &mut out_count,
4401        );
4402        assert!(matches!(err, super::TgmError::InvalidArg));
4403    }
4404
4405    // ── tgm_scan ──
4406
4407    #[test]
4408    fn ffi_scan_single_message() {
4409        let encoded = ffi_encode_single_f32_tensor(&[1.0f32, 2.0], "");
4410
4411        let mut result: *mut super::TgmScanResult = ptr::null_mut();
4412        let err = super::tgm_scan(encoded.as_ptr(), encoded.len(), &mut result);
4413        assert!(matches!(err, super::TgmError::Ok));
4414        assert!(!result.is_null());
4415
4416        assert_eq!(super::tgm_scan_count(result), 1);
4417
4418        let entry = super::tgm_scan_entry(result, 0);
4419        assert_eq!(entry.offset, 0);
4420        assert_eq!(entry.length, encoded.len());
4421
4422        // Out of bounds entry returns sentinel (offset=usize::MAX, length=0)
4423        // and sets tgm_last_error
4424        let bad = super::tgm_scan_entry(result, 999);
4425        assert_eq!(bad.offset, usize::MAX);
4426        assert_eq!(bad.length, 0);
4427        let err_ptr = super::tgm_last_error();
4428        assert!(!err_ptr.is_null());
4429        let err_str = unsafe { CStr::from_ptr(err_ptr) }.to_str().unwrap();
4430        assert!(
4431            err_str.contains("out of range"),
4432            "expected OOB error, got: {err_str}"
4433        );
4434
4435        super::tgm_scan_free(result);
4436    }
4437
4438    #[test]
4439    fn ffi_scan_null_args() {
4440        let mut result: *mut super::TgmScanResult = ptr::null_mut();
4441        let err = super::tgm_scan(ptr::null(), 0, &mut result);
4442        assert!(matches!(err, super::TgmError::InvalidArg));
4443
4444        let data = [0u8; 10];
4445        let err = super::tgm_scan(data.as_ptr(), data.len(), ptr::null_mut());
4446        assert!(matches!(err, super::TgmError::InvalidArg));
4447    }
4448
4449    #[test]
4450    fn ffi_scan_null_handle_accessors() {
4451        assert_eq!(super::tgm_scan_count(ptr::null()), 0);
4452        let entry = super::tgm_scan_entry(ptr::null(), 0);
4453        assert_eq!(entry.offset, usize::MAX);
4454        assert_eq!(entry.length, 0);
4455    }
4456
4457    #[test]
4458    fn ffi_scan_concatenated_messages() {
4459        let msg1 = ffi_encode_single_f32_tensor(&[1.0f32], "");
4460        let msg2 = ffi_encode_single_f32_tensor(&[2.0f32], "");
4461        let mut concat = msg1.clone();
4462        concat.extend_from_slice(&msg2);
4463
4464        let mut result: *mut super::TgmScanResult = ptr::null_mut();
4465        let err = super::tgm_scan(concat.as_ptr(), concat.len(), &mut result);
4466        assert!(matches!(err, super::TgmError::Ok));
4467
4468        assert_eq!(super::tgm_scan_count(result), 2);
4469
4470        let e0 = super::tgm_scan_entry(result, 0);
4471        assert_eq!(e0.offset, 0);
4472        assert_eq!(e0.length, msg1.len());
4473
4474        let e1 = super::tgm_scan_entry(result, 1);
4475        assert_eq!(e1.offset, msg1.len());
4476        assert_eq!(e1.length, msg2.len());
4477
4478        super::tgm_scan_free(result);
4479    }
4480
4481    // ── tgm_file_* functions ──
4482
4483    #[test]
4484    fn ffi_file_create_append_count_decode_close() {
4485        let dir = std::env::temp_dir();
4486        let path = dir.join("ffi_test_file.tgm");
4487        let _ = std::fs::remove_file(&path);
4488
4489        let c_path = CString::new(path.to_str().unwrap()).unwrap();
4490
4491        // Create
4492        let mut file: *mut super::TgmFile = ptr::null_mut();
4493        let err = super::tgm_file_create(c_path.as_ptr(), &mut file);
4494        assert!(matches!(err, super::TgmError::Ok));
4495        assert!(!file.is_null());
4496
4497        // Append a message
4498        let values = [10.0f32, 20.0, 30.0];
4499        let shape_str = format!("[{}]", values.len());
4500        let json = format!(
4501            r#"{{"version":3,"descriptors":[{{"type":"ntensor","ndim":1,"shape":{shape},"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}}]}}"#,
4502            shape = shape_str,
4503            bo = if cfg!(target_endian = "little") {
4504                "little"
4505            } else {
4506                "big"
4507            },
4508        );
4509        let c_json = CString::new(json).unwrap();
4510        let data: Vec<u8> = values.iter().flat_map(|v| v.to_ne_bytes()).collect();
4511        let data_ptr: *const u8 = data.as_ptr();
4512        let data_len: usize = data.len();
4513
4514        let err = super::tgm_file_append(
4515            file,
4516            c_json.as_ptr(),
4517            &data_ptr as *const *const u8,
4518            &data_len as *const usize,
4519            1,
4520            ptr::null(),
4521            0,
4522        );
4523        assert!(matches!(err, super::TgmError::Ok));
4524
4525        // Check path accessor
4526        let path_ptr = super::tgm_file_path(file);
4527        assert!(!path_ptr.is_null());
4528        let path_str = unsafe { CStr::from_ptr(path_ptr) }.to_str().unwrap();
4529        assert!(path_str.contains("ffi_test_file.tgm"));
4530
4531        super::tgm_file_close(file);
4532
4533        // Re-open for reading
4534        let mut file2: *mut super::TgmFile = ptr::null_mut();
4535        let err = super::tgm_file_open(c_path.as_ptr(), &mut file2);
4536        assert!(matches!(err, super::TgmError::Ok));
4537
4538        // Message count
4539        let mut count: usize = 0;
4540        let err = super::tgm_file_message_count(file2, &mut count);
4541        assert!(matches!(err, super::TgmError::Ok));
4542        assert_eq!(count, 1);
4543
4544        // Decode message
4545        let mut msg: *mut super::TgmMessage = ptr::null_mut();
4546        let err = super::tgm_file_decode_message(file2, 0, 0, 0, 0, &mut msg);
4547        assert!(matches!(err, super::TgmError::Ok));
4548
4549        assert_eq!(super::tgm_message_num_objects(msg), 1);
4550        let mut data_len2: usize = 0;
4551        let dp = super::tgm_object_data(msg, 0, &mut data_len2);
4552        assert!(!dp.is_null());
4553        let decoded_bytes = unsafe { slice::from_raw_parts(dp, data_len2) };
4554        let decoded_values: Vec<f32> = decoded_bytes
4555            .chunks_exact(4)
4556            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
4557            .collect();
4558        assert_eq!(decoded_values, values);
4559
4560        super::tgm_message_free(msg);
4561
4562        // Read raw message
4563        let mut raw = super::TgmBytes {
4564            data: ptr::null_mut(),
4565            len: 0,
4566        };
4567        let err = super::tgm_file_read_message(file2, 0, &mut raw);
4568        assert!(matches!(err, super::TgmError::Ok));
4569        assert!(!raw.data.is_null());
4570        assert!(raw.len > 0);
4571        super::tgm_bytes_free(raw);
4572
4573        super::tgm_file_close(file2);
4574        let _ = std::fs::remove_file(&path);
4575    }
4576
4577    #[test]
4578    fn ffi_file_open_nonexistent() {
4579        let c_path = CString::new("/nonexistent/file.tgm").unwrap();
4580        let mut file: *mut super::TgmFile = ptr::null_mut();
4581        let err = super::tgm_file_open(c_path.as_ptr(), &mut file);
4582        assert!(!matches!(err, super::TgmError::Ok));
4583    }
4584
4585    #[test]
4586    fn ffi_file_null_args() {
4587        let mut file: *mut super::TgmFile = ptr::null_mut();
4588
4589        // open null path
4590        let err = super::tgm_file_open(ptr::null(), &mut file);
4591        assert!(matches!(err, super::TgmError::InvalidArg));
4592
4593        // open null out
4594        let c_path = CString::new("/tmp/test.tgm").unwrap();
4595        let err = super::tgm_file_open(c_path.as_ptr(), ptr::null_mut());
4596        assert!(matches!(err, super::TgmError::InvalidArg));
4597
4598        // create null path
4599        let err = super::tgm_file_create(ptr::null(), &mut file);
4600        assert!(matches!(err, super::TgmError::InvalidArg));
4601
4602        // create null out
4603        let err = super::tgm_file_create(c_path.as_ptr(), ptr::null_mut());
4604        assert!(matches!(err, super::TgmError::InvalidArg));
4605
4606        // message_count null args
4607        let err = super::tgm_file_message_count(ptr::null_mut(), ptr::null_mut());
4608        assert!(matches!(err, super::TgmError::InvalidArg));
4609
4610        // decode_message null args
4611        let err = super::tgm_file_decode_message(ptr::null_mut(), 0, 0, 0, 0, ptr::null_mut());
4612        assert!(matches!(err, super::TgmError::InvalidArg));
4613
4614        // read_message null args
4615        let err = super::tgm_file_read_message(ptr::null_mut(), 0, ptr::null_mut());
4616        assert!(matches!(err, super::TgmError::InvalidArg));
4617
4618        // append null args
4619        let err = super::tgm_file_append(
4620            ptr::null_mut(),
4621            ptr::null(),
4622            ptr::null(),
4623            ptr::null(),
4624            0,
4625            ptr::null(),
4626            0,
4627        );
4628        assert!(matches!(err, super::TgmError::InvalidArg));
4629
4630        // append_raw null args
4631        let err = super::tgm_file_append_raw(ptr::null_mut(), ptr::null(), 0);
4632        assert!(matches!(err, super::TgmError::InvalidArg));
4633
4634        // path null
4635        assert!(super::tgm_file_path(ptr::null()).is_null());
4636    }
4637
4638    #[test]
4639    fn ffi_file_append_raw_round_trip() {
4640        let dir = std::env::temp_dir();
4641        let path = dir.join("ffi_test_append_raw.tgm");
4642        let _ = std::fs::remove_file(&path);
4643
4644        let c_path = CString::new(path.to_str().unwrap()).unwrap();
4645
4646        // Create
4647        let mut file: *mut super::TgmFile = ptr::null_mut();
4648        let err = super::tgm_file_create(c_path.as_ptr(), &mut file);
4649        assert!(matches!(err, super::TgmError::Ok));
4650
4651        // Encode a message in memory, then append raw bytes
4652        let encoded = ffi_encode_single_f32_tensor(&[1.0f32, 2.0], "");
4653        let err = super::tgm_file_append_raw(file, encoded.as_ptr(), encoded.len());
4654        assert!(matches!(err, super::TgmError::Ok));
4655
4656        // Count
4657        let mut count: usize = 0;
4658        let err = super::tgm_file_message_count(file, &mut count);
4659        assert!(matches!(err, super::TgmError::Ok));
4660        assert_eq!(count, 1);
4661
4662        super::tgm_file_close(file);
4663        let _ = std::fs::remove_file(&path);
4664    }
4665
4666    // ── tgm_streaming_encoder_* ──
4667
4668    #[test]
4669    fn ffi_streaming_encoder_round_trip() {
4670        let dir = std::env::temp_dir();
4671        let path = dir.join("ffi_streaming_test.tgm");
4672        let _ = std::fs::remove_file(&path);
4673
4674        let c_path = CString::new(path.to_str().unwrap()).unwrap();
4675        let meta_json = CString::new(r#"{"version":3}"#).unwrap();
4676
4677        // Create
4678        let mut enc: *mut super::TgmStreamingEncoder = ptr::null_mut();
4679        let err = super::tgm_streaming_encoder_create(
4680            c_path.as_ptr(),
4681            meta_json.as_ptr(),
4682            ptr::null(), // no hash
4683            0,           // threads
4684            &mut enc,
4685        );
4686        assert!(matches!(err, super::TgmError::Ok));
4687        assert!(!enc.is_null());
4688
4689        // Write an object
4690        let values = [100.0f32, 200.0, 300.0];
4691        let data: Vec<u8> = values.iter().flat_map(|v| v.to_ne_bytes()).collect();
4692        let desc_json = CString::new(format!(
4693            r#"{{"type":"ntensor","ndim":1,"shape":[{len}],"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}}"#,
4694            len = values.len(),
4695            bo = if cfg!(target_endian = "little") { "little" } else { "big" },
4696        )).unwrap();
4697
4698        let err =
4699            super::tgm_streaming_encoder_write(enc, desc_json.as_ptr(), data.as_ptr(), data.len());
4700        assert!(matches!(err, super::TgmError::Ok));
4701
4702        // Count
4703        assert_eq!(super::tgm_streaming_encoder_count(enc), 1);
4704
4705        // Finish
4706        let err = super::tgm_streaming_encoder_finish(enc);
4707        assert!(matches!(err, super::TgmError::Ok));
4708
4709        // Double finish should fail
4710        let err = super::tgm_streaming_encoder_finish(enc);
4711        assert!(matches!(err, super::TgmError::InvalidArg));
4712
4713        // Count after finish
4714        assert_eq!(super::tgm_streaming_encoder_count(enc), 0);
4715
4716        // Free
4717        super::tgm_streaming_encoder_free(enc);
4718
4719        // Read back and verify
4720        let mut file: *mut super::TgmFile = ptr::null_mut();
4721        let err = super::tgm_file_open(c_path.as_ptr(), &mut file);
4722        assert!(matches!(err, super::TgmError::Ok));
4723
4724        let mut count: usize = 0;
4725        let err = super::tgm_file_message_count(file, &mut count);
4726        assert!(matches!(err, super::TgmError::Ok));
4727        assert_eq!(count, 1);
4728
4729        let mut msg: *mut super::TgmMessage = ptr::null_mut();
4730        let err = super::tgm_file_decode_message(file, 0, 0, 0, 0, &mut msg);
4731        assert!(matches!(err, super::TgmError::Ok));
4732
4733        let mut data_len: usize = 0;
4734        let dp = super::tgm_object_data(msg, 0, &mut data_len);
4735        let decoded_bytes = unsafe { slice::from_raw_parts(dp, data_len) };
4736        let decoded_values: Vec<f32> = decoded_bytes
4737            .chunks_exact(4)
4738            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
4739            .collect();
4740        assert_eq!(decoded_values, values);
4741
4742        super::tgm_message_free(msg);
4743        super::tgm_file_close(file);
4744        let _ = std::fs::remove_file(&path);
4745    }
4746
4747    #[test]
4748    fn ffi_streaming_encoder_null_args() {
4749        let mut enc: *mut super::TgmStreamingEncoder = ptr::null_mut();
4750
4751        // create with null path
4752        let meta = CString::new(r#"{"version":3}"#).unwrap();
4753        let err = super::tgm_streaming_encoder_create(
4754            ptr::null(),
4755            meta.as_ptr(),
4756            ptr::null(),
4757            0,
4758            &mut enc,
4759        );
4760        assert!(matches!(err, super::TgmError::InvalidArg));
4761
4762        // create with null metadata
4763        let p = CString::new("/tmp/dummy.tgm").unwrap();
4764        let err =
4765            super::tgm_streaming_encoder_create(p.as_ptr(), ptr::null(), ptr::null(), 0, &mut enc);
4766        assert!(matches!(err, super::TgmError::InvalidArg));
4767
4768        // create with null out
4769        let err = super::tgm_streaming_encoder_create(
4770            p.as_ptr(),
4771            meta.as_ptr(),
4772            ptr::null(),
4773            0,
4774            ptr::null_mut(),
4775        );
4776        assert!(matches!(err, super::TgmError::InvalidArg));
4777
4778        // write null enc
4779        let desc = CString::new(r#"{}"#).unwrap();
4780        let data = [0u8; 4];
4781        let err = super::tgm_streaming_encoder_write(
4782            ptr::null_mut(),
4783            desc.as_ptr(),
4784            data.as_ptr(),
4785            data.len(),
4786        );
4787        assert!(matches!(err, super::TgmError::InvalidArg));
4788
4789        // write null descriptor
4790        // Need a valid encoder for this — skip as it requires file creation
4791
4792        // finish null
4793        let err = super::tgm_streaming_encoder_finish(ptr::null_mut());
4794        assert!(matches!(err, super::TgmError::InvalidArg));
4795
4796        // count null
4797        assert_eq!(super::tgm_streaming_encoder_count(ptr::null()), 0);
4798
4799        // free null — should not crash
4800        super::tgm_streaming_encoder_free(ptr::null_mut());
4801    }
4802
4803    #[test]
4804    fn ffi_streaming_encoder_write_null_data() {
4805        let dir = std::env::temp_dir();
4806        let path = dir.join("ffi_streaming_null_data.tgm");
4807        let _ = std::fs::remove_file(&path);
4808
4809        let c_path = CString::new(path.to_str().unwrap()).unwrap();
4810        let meta_json = CString::new(r#"{"version":3}"#).unwrap();
4811
4812        let mut enc: *mut super::TgmStreamingEncoder = ptr::null_mut();
4813        let err = super::tgm_streaming_encoder_create(
4814            c_path.as_ptr(),
4815            meta_json.as_ptr(),
4816            ptr::null(),
4817            0,
4818            &mut enc,
4819        );
4820        assert!(matches!(err, super::TgmError::Ok));
4821
4822        let desc = CString::new(r#"{"type":"ntensor","ndim":1,"shape":[1],"strides":[1],"dtype":"float32","byte_order":"little","encoding":"none","filter":"none","compression":"none"}"#).unwrap();
4823        let err = super::tgm_streaming_encoder_write(enc, desc.as_ptr(), ptr::null(), 4);
4824        assert!(matches!(err, super::TgmError::InvalidArg));
4825
4826        // Write with null descriptor json
4827        let data = [0u8; 4];
4828        let err = super::tgm_streaming_encoder_write(enc, ptr::null(), data.as_ptr(), data.len());
4829        assert!(matches!(err, super::TgmError::InvalidArg));
4830
4831        super::tgm_streaming_encoder_free(enc);
4832        let _ = std::fs::remove_file(&path);
4833    }
4834
4835    #[test]
4836    fn ffi_streaming_encoder_with_preceder() {
4837        let dir = std::env::temp_dir();
4838        let path = dir.join("ffi_streaming_preceder.tgm");
4839        let _ = std::fs::remove_file(&path);
4840
4841        let c_path = CString::new(path.to_str().unwrap()).unwrap();
4842        let meta_json = CString::new(r#"{"version":3}"#).unwrap();
4843
4844        let mut enc: *mut super::TgmStreamingEncoder = ptr::null_mut();
4845        let err = super::tgm_streaming_encoder_create(
4846            c_path.as_ptr(),
4847            meta_json.as_ptr(),
4848            ptr::null(),
4849            0,
4850            &mut enc,
4851        );
4852        assert!(matches!(err, super::TgmError::Ok));
4853
4854        // Write preceder
4855        let preceder_json = CString::new(r#"{"param":"2t","source":"test"}"#).unwrap();
4856        let err = super::tgm_streaming_encoder_write_preceder(enc, preceder_json.as_ptr());
4857        assert!(matches!(err, super::TgmError::Ok));
4858
4859        // Write object after preceder
4860        let values = [42.0f32];
4861        let data: Vec<u8> = values.iter().flat_map(|v| v.to_ne_bytes()).collect();
4862        let desc_json = CString::new(format!(
4863            r#"{{"type":"ntensor","ndim":1,"shape":[1],"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}}"#,
4864            bo = if cfg!(target_endian = "little") { "little" } else { "big" },
4865        )).unwrap();
4866
4867        let err =
4868            super::tgm_streaming_encoder_write(enc, desc_json.as_ptr(), data.as_ptr(), data.len());
4869        assert!(matches!(err, super::TgmError::Ok));
4870
4871        let err = super::tgm_streaming_encoder_finish(enc);
4872        assert!(matches!(err, super::TgmError::Ok));
4873        super::tgm_streaming_encoder_free(enc);
4874
4875        // Re-open and verify the metadata contains the preceder keys
4876        let mut file: *mut super::TgmFile = ptr::null_mut();
4877        let err = super::tgm_file_open(c_path.as_ptr(), &mut file);
4878        assert!(matches!(err, super::TgmError::Ok));
4879
4880        let mut msg: *mut super::TgmMessage = ptr::null_mut();
4881        let err = super::tgm_file_decode_message(file, 0, 0, 0, 0, &mut msg);
4882        assert!(matches!(err, super::TgmError::Ok));
4883
4884        let mut meta: *mut super::TgmMetadata = ptr::null_mut();
4885        let err = super::tgm_message_metadata(msg, &mut meta);
4886        assert!(matches!(err, super::TgmError::Ok));
4887
4888        let key = CString::new("param").unwrap();
4889        let val_ptr = super::tgm_metadata_get_string(meta, key.as_ptr());
4890        assert!(!val_ptr.is_null());
4891        let val_str = unsafe { CStr::from_ptr(val_ptr) }.to_str().unwrap();
4892        assert_eq!(val_str, "2t");
4893
4894        super::tgm_metadata_free(meta);
4895        super::tgm_message_free(msg);
4896        super::tgm_file_close(file);
4897        let _ = std::fs::remove_file(&path);
4898    }
4899
4900    #[test]
4901    fn ffi_streaming_encoder_write_preceder_null_args() {
4902        let err = super::tgm_streaming_encoder_write_preceder(ptr::null_mut(), ptr::null());
4903        assert!(matches!(err, super::TgmError::InvalidArg));
4904    }
4905
4906    #[test]
4907    fn ffi_streaming_encoder_write_pre_encoded_null_args() {
4908        let err = super::tgm_streaming_encoder_write_pre_encoded(
4909            ptr::null_mut(),
4910            ptr::null(),
4911            ptr::null(),
4912            0,
4913        );
4914        assert!(matches!(err, super::TgmError::InvalidArg));
4915    }
4916
4917    // ── tgm_compute_hash ──
4918
4919    #[test]
4920    fn ffi_compute_hash_xxh3() {
4921        let data = b"hello world";
4922        let mut out = super::TgmBytes {
4923            data: ptr::null_mut(),
4924            len: 0,
4925        };
4926        let err = super::tgm_compute_hash(
4927            data.as_ptr(),
4928            data.len(),
4929            ptr::null(), // default = xxh3
4930            &mut out,
4931        );
4932        assert!(matches!(err, super::TgmError::Ok));
4933        assert!(!out.data.is_null());
4934        assert!(out.len > 0);
4935
4936        let hex = unsafe { std::str::from_utf8(slice::from_raw_parts(out.data, out.len)).unwrap() };
4937        // xxh3 produces a hex string
4938        assert!(!hex.is_empty());
4939        assert!(hex.chars().all(|c| c.is_ascii_hexdigit()));
4940
4941        super::tgm_bytes_free(out);
4942    }
4943
4944    #[test]
4945    fn ffi_compute_hash_explicit_xxh3() {
4946        let data = b"test data";
4947        let algo = CString::new("xxh3").unwrap();
4948        let mut out = super::TgmBytes {
4949            data: ptr::null_mut(),
4950            len: 0,
4951        };
4952        let err = super::tgm_compute_hash(data.as_ptr(), data.len(), algo.as_ptr(), &mut out);
4953        assert!(matches!(err, super::TgmError::Ok));
4954        assert!(out.len > 0);
4955        super::tgm_bytes_free(out);
4956    }
4957
4958    #[test]
4959    fn ffi_compute_hash_null_data() {
4960        let mut out = super::TgmBytes {
4961            data: ptr::null_mut(),
4962            len: 0,
4963        };
4964        let err = super::tgm_compute_hash(ptr::null(), 0, ptr::null(), &mut out);
4965        assert!(matches!(err, super::TgmError::InvalidArg));
4966    }
4967
4968    #[test]
4969    fn ffi_compute_hash_null_out() {
4970        let data = b"hello";
4971        let err = super::tgm_compute_hash(data.as_ptr(), data.len(), ptr::null(), ptr::null_mut());
4972        assert!(matches!(err, super::TgmError::InvalidArg));
4973    }
4974
4975    #[test]
4976    fn ffi_compute_hash_invalid_algo() {
4977        let data = b"hello";
4978        let algo = CString::new("bogus_algo").unwrap();
4979        let mut out = super::TgmBytes {
4980            data: ptr::null_mut(),
4981            len: 0,
4982        };
4983        let err = super::tgm_compute_hash(data.as_ptr(), data.len(), algo.as_ptr(), &mut out);
4984        assert!(matches!(err, super::TgmError::InvalidArg));
4985    }
4986
4987    // ── tgm_simple_packing_compute_params ──
4988
4989    #[test]
4990    fn ffi_simple_packing_compute_params() {
4991        let values = [100.0f64, 200.0, 300.0, 400.0];
4992        let mut ref_val: f64 = 0.0;
4993        let mut bin_scale: i32 = 0;
4994        let err = super::tgm_simple_packing_compute_params(
4995            values.as_ptr(),
4996            values.len(),
4997            16,
4998            0,
4999            &mut ref_val,
5000            &mut bin_scale,
5001        );
5002        assert!(matches!(err, super::TgmError::Ok));
5003        // Reference value should be <= min(values)
5004        assert!(ref_val <= 100.0);
5005    }
5006
5007    #[test]
5008    fn ffi_simple_packing_compute_params_null_args() {
5009        let mut ref_val: f64 = 0.0;
5010        let mut bin_scale: i32 = 0;
5011
5012        // null values
5013        let err = super::tgm_simple_packing_compute_params(
5014            ptr::null(),
5015            0,
5016            16,
5017            0,
5018            &mut ref_val,
5019            &mut bin_scale,
5020        );
5021        assert!(matches!(err, super::TgmError::InvalidArg));
5022
5023        // null out_reference_value
5024        let values = [1.0f64];
5025        let err = super::tgm_simple_packing_compute_params(
5026            values.as_ptr(),
5027            values.len(),
5028            16,
5029            0,
5030            ptr::null_mut(),
5031            &mut bin_scale,
5032        );
5033        assert!(matches!(err, super::TgmError::InvalidArg));
5034
5035        // null out_binary_scale_factor
5036        let err = super::tgm_simple_packing_compute_params(
5037            values.as_ptr(),
5038            values.len(),
5039            16,
5040            0,
5041            &mut ref_val,
5042            ptr::null_mut(),
5043        );
5044        assert!(matches!(err, super::TgmError::InvalidArg));
5045    }
5046
5047    // ── tgm_last_error ──
5048
5049    #[test]
5050    fn ffi_last_error_after_success() {
5051        // After a successful encode, last_error should remain from whatever was
5052        // set before (or NULL). We don't clear on success.
5053        let values = [1.0f32];
5054        let _ = ffi_encode_single_f32_tensor(&values, "");
5055        // No crash, test passes
5056    }
5057
5058    #[test]
5059    fn ffi_last_error_after_failure() {
5060        // Trigger an error
5061        let mut out = super::TgmBytes {
5062            data: ptr::null_mut(),
5063            len: 0,
5064        };
5065        let _ = super::tgm_encode(
5066            ptr::null(),
5067            ptr::null(),
5068            ptr::null(),
5069            0,
5070            ptr::null(),
5071            0,
5072            &mut out,
5073        );
5074
5075        let err_ptr = super::tgm_last_error();
5076        assert!(!err_ptr.is_null());
5077        let err_str = unsafe { CStr::from_ptr(err_ptr) }.to_str().unwrap();
5078        assert!(err_str.contains("null"));
5079    }
5080
5081    // ── tgm_error_string ──
5082
5083    #[test]
5084    fn ffi_error_string_all_variants() {
5085        let check = |err: super::TgmError, expected: &str| {
5086            let ptr = super::tgm_error_string(err);
5087            assert!(!ptr.is_null());
5088            let s = unsafe { CStr::from_ptr(ptr) }.to_str().unwrap();
5089            assert_eq!(s, expected);
5090        };
5091
5092        check(super::TgmError::Ok, "ok");
5093        check(super::TgmError::Framing, "framing error");
5094        check(super::TgmError::Metadata, "metadata error");
5095        check(super::TgmError::Encoding, "encoding error");
5096        check(super::TgmError::Compression, "compression error");
5097        check(super::TgmError::Object, "object error");
5098        check(super::TgmError::Io, "I/O error");
5099        check(super::TgmError::HashMismatch, "hash mismatch");
5100        check(super::TgmError::InvalidArg, "invalid argument");
5101        check(super::TgmError::EndOfIter, "end of iteration");
5102        check(super::TgmError::Remote, "remote error");
5103    }
5104
5105    // ── tgm_bytes_free safety ──
5106
5107    #[test]
5108    fn ffi_bytes_free_null_data() {
5109        let buf = super::TgmBytes {
5110            data: ptr::null_mut(),
5111            len: 0,
5112        };
5113        super::tgm_bytes_free(buf); // should not crash
5114    }
5115
5116    // ── tgm_message_free / tgm_metadata_free / tgm_scan_free null safety ──
5117
5118    #[test]
5119    fn ffi_free_null_handles() {
5120        super::tgm_message_free(ptr::null_mut());
5121        super::tgm_metadata_free(ptr::null_mut());
5122        super::tgm_scan_free(ptr::null_mut());
5123        super::tgm_file_close(ptr::null_mut());
5124        super::tgm_streaming_encoder_free(ptr::null_mut());
5125        // Should all be no-ops, no crash
5126    }
5127
5128    // ── tgm_encode_pre_encoded ──
5129
5130    #[test]
5131    fn ffi_encode_pre_encoded_null_args() {
5132        let mut out = super::TgmBytes {
5133            data: ptr::null_mut(),
5134            len: 0,
5135        };
5136        let err = super::tgm_encode_pre_encoded(
5137            ptr::null(),
5138            ptr::null(),
5139            ptr::null(),
5140            0,
5141            ptr::null(),
5142            0,
5143            &mut out,
5144        );
5145        assert!(matches!(err, super::TgmError::InvalidArg));
5146
5147        let json = CString::new(r#"{"version":3,"descriptors":[]}"#).unwrap();
5148        let err = super::tgm_encode_pre_encoded(
5149            json.as_ptr(),
5150            ptr::null(),
5151            ptr::null(),
5152            0,
5153            ptr::null(),
5154            0,
5155            ptr::null_mut(),
5156        );
5157        assert!(matches!(err, super::TgmError::InvalidArg));
5158    }
5159
5160    #[test]
5161    fn ffi_encode_pre_encoded_round_trip() {
5162        // Encode with pre_encoded: for "none" encoding, the raw bytes are the payload
5163        let values = [5.0f32, 6.0, 7.0];
5164        let data: Vec<u8> = values.iter().flat_map(|v| v.to_ne_bytes()).collect();
5165
5166        let json = format!(
5167            r#"{{"version":3,"descriptors":[{{"type":"ntensor","ndim":1,"shape":[{len}],"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}}]}}"#,
5168            len = values.len(),
5169            bo = if cfg!(target_endian = "little") {
5170                "little"
5171            } else {
5172                "big"
5173            },
5174        );
5175        let c_json = CString::new(json).unwrap();
5176        let data_ptr: *const u8 = data.as_ptr();
5177        let data_len: usize = data.len();
5178
5179        let mut out = super::TgmBytes {
5180            data: ptr::null_mut(),
5181            len: 0,
5182        };
5183        let err = super::tgm_encode_pre_encoded(
5184            c_json.as_ptr(),
5185            &data_ptr as *const *const u8,
5186            &data_len as *const usize,
5187            1,
5188            ptr::null(),
5189            0,
5190            &mut out,
5191        );
5192        assert!(matches!(err, super::TgmError::Ok));
5193
5194        let encoded = unsafe { slice::from_raw_parts(out.data, out.len) }.to_vec();
5195        super::tgm_bytes_free(out);
5196
5197        // Decode
5198        let mut msg: *mut super::TgmMessage = ptr::null_mut();
5199        let err = super::tgm_decode(encoded.as_ptr(), encoded.len(), 0, 0, 0, &mut msg);
5200        assert!(matches!(err, super::TgmError::Ok));
5201
5202        let mut dl: usize = 0;
5203        let dp = super::tgm_object_data(msg, 0, &mut dl);
5204        let decoded_bytes = unsafe { slice::from_raw_parts(dp, dl) };
5205        let decoded_values: Vec<f32> = decoded_bytes
5206            .chunks_exact(4)
5207            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
5208            .collect();
5209        assert_eq!(decoded_values, values);
5210
5211        super::tgm_message_free(msg);
5212    }
5213
5214    // ── tgm_buffer_iter_* ──
5215
5216    #[test]
5217    fn ffi_buffer_iter_round_trip() {
5218        let msg1 = ffi_encode_single_f32_tensor(&[1.0f32], "");
5219        let msg2 = ffi_encode_single_f32_tensor(&[2.0f32], "");
5220        let mut concat = msg1.clone();
5221        concat.extend_from_slice(&msg2);
5222
5223        let mut iter: *mut super::TgmBufferIter = ptr::null_mut();
5224        let err = super::tgm_buffer_iter_create(concat.as_ptr(), concat.len(), &mut iter);
5225        assert!(matches!(err, super::TgmError::Ok));
5226        assert!(!iter.is_null());
5227
5228        assert_eq!(super::tgm_buffer_iter_count(iter), 2);
5229
5230        // First message
5231        let mut out_buf: *const u8 = ptr::null();
5232        let mut out_len: usize = 0;
5233        let err = super::tgm_buffer_iter_next(iter, &mut out_buf, &mut out_len);
5234        assert!(matches!(err, super::TgmError::Ok));
5235        assert!(!out_buf.is_null());
5236        assert_eq!(out_len, msg1.len());
5237
5238        // Second message
5239        let err = super::tgm_buffer_iter_next(iter, &mut out_buf, &mut out_len);
5240        assert!(matches!(err, super::TgmError::Ok));
5241        assert_eq!(out_len, msg2.len());
5242
5243        // End of iteration
5244        let err = super::tgm_buffer_iter_next(iter, &mut out_buf, &mut out_len);
5245        assert!(matches!(err, super::TgmError::EndOfIter));
5246
5247        super::tgm_buffer_iter_free(iter);
5248    }
5249
5250    #[test]
5251    fn ffi_buffer_iter_null_args() {
5252        let mut iter: *mut super::TgmBufferIter = ptr::null_mut();
5253        let err = super::tgm_buffer_iter_create(ptr::null(), 0, &mut iter);
5254        assert!(matches!(err, super::TgmError::InvalidArg));
5255
5256        let data = [0u8; 10];
5257        let err = super::tgm_buffer_iter_create(data.as_ptr(), data.len(), ptr::null_mut());
5258        assert!(matches!(err, super::TgmError::InvalidArg));
5259
5260        // next with null iter
5261        let mut out_buf: *const u8 = ptr::null();
5262        let mut out_len: usize = 0;
5263        let err = super::tgm_buffer_iter_next(ptr::null_mut(), &mut out_buf, &mut out_len);
5264        assert!(matches!(err, super::TgmError::InvalidArg));
5265
5266        // next with null out_buf
5267        // We need a valid iter for this, but let's test the easy null cases
5268        let err = super::tgm_buffer_iter_next(ptr::null_mut(), ptr::null_mut(), &mut out_len);
5269        assert!(matches!(err, super::TgmError::InvalidArg));
5270
5271        // count null
5272        assert_eq!(super::tgm_buffer_iter_count(ptr::null()), 0);
5273
5274        // free null — no crash
5275        super::tgm_buffer_iter_free(ptr::null_mut());
5276    }
5277
5278    // ── tgm_object_iter_* ──
5279
5280    #[test]
5281    fn ffi_object_iter_round_trip() {
5282        let encoded = ffi_encode_single_f32_tensor(&[10.0f32, 20.0], "");
5283
5284        let mut iter: *mut super::TgmObjectIter = ptr::null_mut();
5285        let err = super::tgm_object_iter_create(
5286            encoded.as_ptr(),
5287            encoded.len(),
5288            0, // no native byte order
5289            0, // verify_hash
5290            &mut iter,
5291        );
5292        assert!(matches!(err, super::TgmError::Ok));
5293        assert!(!iter.is_null());
5294
5295        // Get the single object
5296        let mut msg: *mut super::TgmMessage = ptr::null_mut();
5297        let err = super::tgm_object_iter_next(iter, &mut msg);
5298        assert!(matches!(err, super::TgmError::Ok));
5299        assert!(!msg.is_null());
5300
5301        assert_eq!(super::tgm_message_num_objects(msg), 1);
5302        assert_eq!(super::tgm_message_version(msg), 3);
5303
5304        let mut data_len: usize = 0;
5305        let dp = super::tgm_object_data(msg, 0, &mut data_len);
5306        assert!(!dp.is_null());
5307        assert_eq!(data_len, 8); // 2 × f32
5308
5309        super::tgm_message_free(msg);
5310
5311        // Iteration should be exhausted
5312        let mut msg2: *mut super::TgmMessage = ptr::null_mut();
5313        let err = super::tgm_object_iter_next(iter, &mut msg2);
5314        assert!(matches!(err, super::TgmError::EndOfIter));
5315
5316        super::tgm_object_iter_free(iter);
5317    }
5318
5319    #[test]
5320    fn ffi_object_iter_null_args() {
5321        let mut iter: *mut super::TgmObjectIter = ptr::null_mut();
5322        let err = super::tgm_object_iter_create(ptr::null(), 0, 0, 0, &mut iter);
5323        assert!(matches!(err, super::TgmError::InvalidArg));
5324
5325        let data = [0u8; 10];
5326        let err = super::tgm_object_iter_create(data.as_ptr(), data.len(), 0, 0, ptr::null_mut());
5327        assert!(matches!(err, super::TgmError::InvalidArg));
5328
5329        // next null iter
5330        let mut msg: *mut super::TgmMessage = ptr::null_mut();
5331        let err = super::tgm_object_iter_next(ptr::null_mut(), &mut msg);
5332        assert!(matches!(err, super::TgmError::InvalidArg));
5333
5334        // next null out
5335        let err = super::tgm_object_iter_next(ptr::null_mut(), ptr::null_mut());
5336        assert!(matches!(err, super::TgmError::InvalidArg));
5337
5338        // free null — no crash
5339        super::tgm_object_iter_free(ptr::null_mut());
5340    }
5341
5342    // ── tgm_file_iter_* ──
5343
5344    #[test]
5345    fn ffi_file_iter_round_trip() {
5346        let dir = std::env::temp_dir();
5347        let path = dir.join("ffi_file_iter_test.tgm");
5348        let _ = std::fs::remove_file(&path);
5349
5350        let c_path = CString::new(path.to_str().unwrap()).unwrap();
5351
5352        // Create file with 2 messages
5353        let mut file: *mut super::TgmFile = ptr::null_mut();
5354        let err = super::tgm_file_create(c_path.as_ptr(), &mut file);
5355        assert!(matches!(err, super::TgmError::Ok));
5356
5357        let msg1 = ffi_encode_single_f32_tensor(&[1.0f32], "");
5358        let msg2 = ffi_encode_single_f32_tensor(&[2.0f32], "");
5359        let err = super::tgm_file_append_raw(file, msg1.as_ptr(), msg1.len());
5360        assert!(matches!(err, super::TgmError::Ok));
5361        let err = super::tgm_file_append_raw(file, msg2.as_ptr(), msg2.len());
5362        assert!(matches!(err, super::TgmError::Ok));
5363
5364        // Create iterator
5365        let mut iter: *mut super::TgmFileIter = ptr::null_mut();
5366        let err = super::tgm_file_iter_create(file, &mut iter);
5367        assert!(matches!(err, super::TgmError::Ok));
5368        assert!(!iter.is_null());
5369
5370        // First message
5371        let mut out = super::TgmBytes {
5372            data: ptr::null_mut(),
5373            len: 0,
5374        };
5375        let err = super::tgm_file_iter_next(iter, &mut out);
5376        assert!(matches!(err, super::TgmError::Ok));
5377        assert!(!out.data.is_null());
5378        assert_eq!(out.len, msg1.len());
5379        super::tgm_bytes_free(out);
5380
5381        // Second message
5382        let mut out2 = super::TgmBytes {
5383            data: ptr::null_mut(),
5384            len: 0,
5385        };
5386        let err = super::tgm_file_iter_next(iter, &mut out2);
5387        assert!(matches!(err, super::TgmError::Ok));
5388        super::tgm_bytes_free(out2);
5389
5390        // End
5391        let mut out3 = super::TgmBytes {
5392            data: ptr::null_mut(),
5393            len: 0,
5394        };
5395        let err = super::tgm_file_iter_next(iter, &mut out3);
5396        assert!(matches!(err, super::TgmError::EndOfIter));
5397
5398        super::tgm_file_iter_free(iter);
5399        super::tgm_file_close(file);
5400        let _ = std::fs::remove_file(&path);
5401    }
5402
5403    #[test]
5404    fn ffi_file_iter_null_args() {
5405        let mut iter: *mut super::TgmFileIter = ptr::null_mut();
5406        let err = super::tgm_file_iter_create(ptr::null_mut(), &mut iter);
5407        assert!(matches!(err, super::TgmError::InvalidArg));
5408
5409        let err = super::tgm_file_iter_create(ptr::null_mut(), ptr::null_mut());
5410        assert!(matches!(err, super::TgmError::InvalidArg));
5411
5412        // next null
5413        let mut out = super::TgmBytes {
5414            data: ptr::null_mut(),
5415            len: 0,
5416        };
5417        let err = super::tgm_file_iter_next(ptr::null_mut(), &mut out);
5418        assert!(matches!(err, super::TgmError::InvalidArg));
5419
5420        let err = super::tgm_file_iter_next(ptr::null_mut(), ptr::null_mut());
5421        assert!(matches!(err, super::TgmError::InvalidArg));
5422
5423        // free null — no crash
5424        super::tgm_file_iter_free(ptr::null_mut());
5425    }
5426
5427    // ── tgm_encode zero objects (metadata-only message) ──
5428
5429    #[test]
5430    fn ffi_encode_decode_zero_objects() {
5431        let json = CString::new(r#"{"version":3,"descriptors":[],"source":"empty"}"#).unwrap();
5432        let mut out = super::TgmBytes {
5433            data: ptr::null_mut(),
5434            len: 0,
5435        };
5436        let err = super::tgm_encode(
5437            json.as_ptr(),
5438            ptr::null(),
5439            ptr::null(),
5440            0,
5441            ptr::null(),
5442            0,
5443            &mut out,
5444        );
5445        assert!(matches!(err, super::TgmError::Ok));
5446
5447        let encoded = unsafe { slice::from_raw_parts(out.data, out.len) }.to_vec();
5448        super::tgm_bytes_free(out);
5449
5450        let mut msg: *mut super::TgmMessage = ptr::null_mut();
5451        let err = super::tgm_decode(encoded.as_ptr(), encoded.len(), 0, 0, 0, &mut msg);
5452        assert!(matches!(err, super::TgmError::Ok));
5453
5454        assert_eq!(super::tgm_message_version(msg), 3);
5455        assert_eq!(super::tgm_message_num_objects(msg), 0);
5456
5457        super::tgm_message_free(msg);
5458    }
5459
5460    // ── tgm_streaming_encoder with hash ──
5461
5462    /// End-to-end: streaming-encode a hashed message, read the file
5463    /// back as bytes, decode via the buffer-based `tgm_decode`, and
5464    /// confirm the inline-hash slot is surfaced through the
5465    /// `tgm_payload_has_hash` / `tgm_object_hash_*` accessors.
5466    ///
5467    /// As of pass 5 the file-based decode path
5468    /// (`tgm_file_decode_message`) also surfaces inline hashes
5469    /// via a re-read of the raw message bytes — see
5470    /// `ffi_file_decode_surfaces_inline_hash`.
5471    #[test]
5472    fn ffi_streaming_encoder_with_hash() {
5473        let dir = std::env::temp_dir();
5474        let path = dir.join("ffi_streaming_hash.tgm");
5475        let _ = std::fs::remove_file(&path);
5476
5477        let c_path = CString::new(path.to_str().unwrap()).unwrap();
5478        let meta_json = CString::new(r#"{"version":3}"#).unwrap();
5479        let hash_algo = CString::new("xxh3").unwrap();
5480
5481        let mut enc: *mut super::TgmStreamingEncoder = ptr::null_mut();
5482        let err = super::tgm_streaming_encoder_create(
5483            c_path.as_ptr(),
5484            meta_json.as_ptr(),
5485            hash_algo.as_ptr(),
5486            0,
5487            &mut enc,
5488        );
5489        assert!(matches!(err, super::TgmError::Ok));
5490
5491        let values = [1.0f32, 2.0];
5492        let data: Vec<u8> = values.iter().flat_map(|v| v.to_ne_bytes()).collect();
5493        let desc_json = CString::new(format!(
5494            r#"{{"type":"ntensor","ndim":1,"shape":[{len}],"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}}"#,
5495            len = values.len(),
5496            bo = if cfg!(target_endian = "little") { "little" } else { "big" },
5497        )).unwrap();
5498
5499        let err =
5500            super::tgm_streaming_encoder_write(enc, desc_json.as_ptr(), data.as_ptr(), data.len());
5501        assert!(matches!(err, super::TgmError::Ok));
5502
5503        let err = super::tgm_streaming_encoder_finish(enc);
5504        assert!(matches!(err, super::TgmError::Ok));
5505        super::tgm_streaming_encoder_free(enc);
5506
5507        // Read back via the buffer-based decode path so inline
5508        // hashes are extracted.
5509        let bytes = std::fs::read(&path).expect("read file");
5510        let mut msg: *mut super::TgmMessage = ptr::null_mut();
5511        let err = super::tgm_decode(bytes.as_ptr(), bytes.len(), 0, 0, 0, &mut msg);
5512        assert!(matches!(err, super::TgmError::Ok));
5513
5514        assert_eq!(super::tgm_payload_has_hash(msg, 0), 1);
5515        let ht = unsafe { CStr::from_ptr(super::tgm_object_hash_type(msg, 0)) }
5516            .to_str()
5517            .unwrap();
5518        assert_eq!(ht, "xxh3");
5519        let hv = unsafe { CStr::from_ptr(super::tgm_object_hash_value(msg, 0)) }
5520            .to_str()
5521            .unwrap();
5522        assert_eq!(hv.len(), 16, "xxh3 digest is 16 hex chars");
5523
5524        super::tgm_message_free(msg);
5525        let _ = std::fs::remove_file(&path);
5526    }
5527
5528    /// Pass-5 cross-language parity: the file-based decode path
5529    /// (`tgm_file_decode_message`) surfaces the same inline hash
5530    /// as the buffer-based path (`tgm_decode`) for the same
5531    /// underlying bytes.  Pins the symmetry added by the
5532    /// `read_message` + `data_object_inline_hashes` two-step
5533    /// inside the FFI file decoder.
5534    #[test]
5535    fn ffi_file_decode_surfaces_inline_hash() {
5536        // Encode a hashed message into a temp file via the streaming
5537        // encoder, then open it with tgm_file_open + decode via
5538        // tgm_file_decode_message and confirm the hash accessors
5539        // report the same digest that tgm_decode surfaces.
5540        let dir = std::env::temp_dir();
5541        let path = dir.join("ffi_file_hash.tgm");
5542        let _ = std::fs::remove_file(&path);
5543
5544        let c_path = CString::new(path.to_str().unwrap()).unwrap();
5545        let meta_json = CString::new(r#"{"version":3}"#).unwrap();
5546        let hash_algo = CString::new("xxh3").unwrap();
5547        let mut enc: *mut super::TgmStreamingEncoder = ptr::null_mut();
5548        assert!(matches!(
5549            super::tgm_streaming_encoder_create(
5550                c_path.as_ptr(),
5551                meta_json.as_ptr(),
5552                hash_algo.as_ptr(),
5553                0,
5554                &mut enc,
5555            ),
5556            super::TgmError::Ok
5557        ));
5558        let values = [1.0f32, 2.0, 3.0];
5559        let data: Vec<u8> = values.iter().flat_map(|v| v.to_ne_bytes()).collect();
5560        let desc_json = CString::new(format!(
5561            r#"{{"type":"ntensor","ndim":1,"shape":[{}],"strides":[1],"dtype":"float32","byte_order":"{}","encoding":"none","filter":"none","compression":"none"}}"#,
5562            values.len(),
5563            if cfg!(target_endian = "little") { "little" } else { "big" },
5564        )).unwrap();
5565        assert!(matches!(
5566            super::tgm_streaming_encoder_write(enc, desc_json.as_ptr(), data.as_ptr(), data.len(),),
5567            super::TgmError::Ok
5568        ));
5569        assert!(matches!(
5570            super::tgm_streaming_encoder_finish(enc),
5571            super::TgmError::Ok
5572        ));
5573        super::tgm_streaming_encoder_free(enc);
5574
5575        // File path: tgm_file_open + tgm_file_decode_message.
5576        let mut file: *mut super::TgmFile = ptr::null_mut();
5577        assert!(matches!(
5578            super::tgm_file_open(c_path.as_ptr(), &mut file),
5579            super::TgmError::Ok
5580        ));
5581        let mut file_msg: *mut super::TgmMessage = ptr::null_mut();
5582        assert!(matches!(
5583            super::tgm_file_decode_message(file, 0, 0, 0, 0, &mut file_msg),
5584            super::TgmError::Ok
5585        ));
5586        let file_has = super::tgm_payload_has_hash(file_msg, 0);
5587        let file_hv_ptr = super::tgm_object_hash_value(file_msg, 0);
5588        assert_eq!(file_has, 1);
5589        assert!(!file_hv_ptr.is_null());
5590        let file_hv = unsafe { CStr::from_ptr(file_hv_ptr) }
5591            .to_str()
5592            .unwrap()
5593            .to_string();
5594
5595        // Buffer path: same file, read into memory, tgm_decode.
5596        let bytes = std::fs::read(&path).unwrap();
5597        let mut buf_msg: *mut super::TgmMessage = ptr::null_mut();
5598        assert!(matches!(
5599            super::tgm_decode(bytes.as_ptr(), bytes.len(), 0, 0, 0, &mut buf_msg),
5600            super::TgmError::Ok
5601        ));
5602        let buf_hv_ptr = super::tgm_object_hash_value(buf_msg, 0);
5603        let buf_hv = unsafe { CStr::from_ptr(buf_hv_ptr) }
5604            .to_str()
5605            .unwrap()
5606            .to_string();
5607
5608        assert_eq!(
5609            file_hv, buf_hv,
5610            "file-path and buffer-path hash values must agree"
5611        );
5612        assert_eq!(file_hv.len(), 16, "xxh3 digest is 16 hex chars");
5613
5614        super::tgm_message_free(file_msg);
5615        super::tgm_message_free(buf_msg);
5616        super::tgm_file_close(file);
5617        let _ = std::fs::remove_file(&path);
5618    }
5619
5620    // ── tgm_streaming_encoder_create with invalid hash algo ──
5621
5622    #[test]
5623    fn ffi_streaming_encoder_invalid_hash_algo() {
5624        let dir = std::env::temp_dir();
5625        let path = dir.join("ffi_streaming_bad_hash.tgm");
5626        let c_path = CString::new(path.to_str().unwrap()).unwrap();
5627        let meta_json = CString::new(r#"{"version":3}"#).unwrap();
5628        let bad_algo = CString::new("bogus_hash").unwrap();
5629
5630        let mut enc: *mut super::TgmStreamingEncoder = ptr::null_mut();
5631        let err = super::tgm_streaming_encoder_create(
5632            c_path.as_ptr(),
5633            meta_json.as_ptr(),
5634            bad_algo.as_ptr(),
5635            0,
5636            &mut enc,
5637        );
5638        assert!(matches!(err, super::TgmError::InvalidArg));
5639        let _ = std::fs::remove_file(&path);
5640    }
5641
5642    // ── tgm_streaming_encoder_create with invalid metadata JSON ──
5643
5644    #[test]
5645    fn ffi_streaming_encoder_invalid_metadata() {
5646        let dir = std::env::temp_dir();
5647        let path = dir.join("ffi_streaming_bad_meta.tgm");
5648        let c_path = CString::new(path.to_str().unwrap()).unwrap();
5649        let bad_meta = CString::new("not json").unwrap();
5650
5651        let mut enc: *mut super::TgmStreamingEncoder = ptr::null_mut();
5652        let err = super::tgm_streaming_encoder_create(
5653            c_path.as_ptr(),
5654            bad_meta.as_ptr(),
5655            ptr::null(),
5656            0,
5657            &mut enc,
5658        );
5659        assert!(matches!(err, super::TgmError::Metadata));
5660        let _ = std::fs::remove_file(&path);
5661    }
5662
5663    // ── Multiple objects encode/decode ──
5664
5665    #[test]
5666    fn ffi_encode_decode_multiple_objects() {
5667        let vals1 = [1.0f32, 2.0];
5668        let vals2 = [10.0f32, 20.0, 30.0];
5669        let bo = if cfg!(target_endian = "little") {
5670            "little"
5671        } else {
5672            "big"
5673        };
5674
5675        let json = format!(
5676            r#"{{"version":3,"descriptors":[{{"type":"ntensor","ndim":1,"shape":[{len1}],"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}},{{"type":"ntensor","ndim":1,"shape":[{len2}],"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}}]}}"#,
5677            len1 = vals1.len(),
5678            len2 = vals2.len(),
5679            bo = bo,
5680        );
5681
5682        let data1: Vec<u8> = vals1.iter().flat_map(|v| v.to_ne_bytes()).collect();
5683        let data2: Vec<u8> = vals2.iter().flat_map(|v| v.to_ne_bytes()).collect();
5684
5685        let c_json = CString::new(json).unwrap();
5686        let data_ptrs: [*const u8; 2] = [data1.as_ptr(), data2.as_ptr()];
5687        let data_lens: [usize; 2] = [data1.len(), data2.len()];
5688
5689        let mut out = super::TgmBytes {
5690            data: ptr::null_mut(),
5691            len: 0,
5692        };
5693        let err = super::tgm_encode(
5694            c_json.as_ptr(),
5695            data_ptrs.as_ptr(),
5696            data_lens.as_ptr(),
5697            2,
5698            ptr::null(),
5699            0,
5700            &mut out,
5701        );
5702        assert!(matches!(err, super::TgmError::Ok));
5703
5704        let encoded = unsafe { slice::from_raw_parts(out.data, out.len) }.to_vec();
5705        super::tgm_bytes_free(out);
5706
5707        // Decode all
5708        let mut msg: *mut super::TgmMessage = ptr::null_mut();
5709        let err = super::tgm_decode(encoded.as_ptr(), encoded.len(), 0, 0, 0, &mut msg);
5710        assert!(matches!(err, super::TgmError::Ok));
5711        assert_eq!(super::tgm_message_num_objects(msg), 2);
5712
5713        // Object 0
5714        let mut dl: usize = 0;
5715        let dp = super::tgm_object_data(msg, 0, &mut dl);
5716        let decoded0: Vec<f32> = unsafe { slice::from_raw_parts(dp, dl) }
5717            .chunks_exact(4)
5718            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
5719            .collect();
5720        assert_eq!(decoded0, vals1);
5721
5722        // Object 1
5723        let dp1 = super::tgm_object_data(msg, 1, &mut dl);
5724        let decoded1: Vec<f32> = unsafe { slice::from_raw_parts(dp1, dl) }
5725            .chunks_exact(4)
5726            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
5727            .collect();
5728        assert_eq!(decoded1, vals2);
5729
5730        // Shape check
5731        let shape0 = super::tgm_object_shape(msg, 0);
5732        assert_eq!(unsafe { *shape0 }, vals1.len() as u64);
5733        let shape1 = super::tgm_object_shape(msg, 1);
5734        assert_eq!(unsafe { *shape1 }, vals2.len() as u64);
5735
5736        super::tgm_message_free(msg);
5737    }
5738
5739    // ── tgm_encode with base metadata ──
5740
5741    #[test]
5742    fn ffi_encode_decode_with_base_metadata() {
5743        let bo = if cfg!(target_endian = "little") {
5744            "little"
5745        } else {
5746            "big"
5747        };
5748        let json = format!(
5749            r#"{{"version":3,"base":[{{"param":"2t","level":"surface"}}],"descriptors":[{{"type":"ntensor","ndim":1,"shape":[2],"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}}]}}"#,
5750            bo = bo,
5751        );
5752
5753        let data: Vec<u8> = [1.0f32, 2.0].iter().flat_map(|v| v.to_ne_bytes()).collect();
5754        let c_json = CString::new(json).unwrap();
5755        let data_ptr: *const u8 = data.as_ptr();
5756        let data_len: usize = data.len();
5757
5758        let mut out = super::TgmBytes {
5759            data: ptr::null_mut(),
5760            len: 0,
5761        };
5762        let err = super::tgm_encode(
5763            c_json.as_ptr(),
5764            &data_ptr as *const *const u8,
5765            &data_len as *const usize,
5766            1,
5767            ptr::null(),
5768            0,
5769            &mut out,
5770        );
5771        assert!(matches!(err, super::TgmError::Ok));
5772
5773        let encoded = unsafe { slice::from_raw_parts(out.data, out.len) }.to_vec();
5774        super::tgm_bytes_free(out);
5775
5776        // Decode metadata and check base keys
5777        let mut meta: *mut super::TgmMetadata = ptr::null_mut();
5778        let err = super::tgm_decode_metadata(encoded.as_ptr(), encoded.len(), &mut meta);
5779        assert!(matches!(err, super::TgmError::Ok));
5780
5781        let key = CString::new("param").unwrap();
5782        let val_ptr = super::tgm_metadata_get_string(meta, key.as_ptr());
5783        assert!(!val_ptr.is_null());
5784        let val_str = unsafe { CStr::from_ptr(val_ptr) }.to_str().unwrap();
5785        assert_eq!(val_str, "2t");
5786
5787        let key2 = CString::new("level").unwrap();
5788        let val_ptr2 = super::tgm_metadata_get_string(meta, key2.as_ptr());
5789        assert!(!val_ptr2.is_null());
5790        let val_str2 = unsafe { CStr::from_ptr(val_ptr2) }.to_str().unwrap();
5791        assert_eq!(val_str2, "surface");
5792
5793        super::tgm_metadata_free(meta);
5794    }
5795
5796    // ── tgm_compute_hash deterministic ──
5797
5798    #[test]
5799    fn ffi_compute_hash_deterministic() {
5800        let data = b"deterministic hash test";
5801        let mut out1 = super::TgmBytes {
5802            data: ptr::null_mut(),
5803            len: 0,
5804        };
5805        let mut out2 = super::TgmBytes {
5806            data: ptr::null_mut(),
5807            len: 0,
5808        };
5809
5810        let err = super::tgm_compute_hash(data.as_ptr(), data.len(), ptr::null(), &mut out1);
5811        assert!(matches!(err, super::TgmError::Ok));
5812
5813        let err = super::tgm_compute_hash(data.as_ptr(), data.len(), ptr::null(), &mut out2);
5814        assert!(matches!(err, super::TgmError::Ok));
5815
5816        let hex1 = unsafe { slice::from_raw_parts(out1.data, out1.len) };
5817        let hex2 = unsafe { slice::from_raw_parts(out2.data, out2.len) };
5818        assert_eq!(hex1, hex2);
5819
5820        super::tgm_bytes_free(out1);
5821        super::tgm_bytes_free(out2);
5822    }
5823
5824    // ── tgm_streaming_encoder_write_pre_encoded round-trip ──
5825
5826    #[test]
5827    fn ffi_streaming_encoder_write_pre_encoded_round_trip() {
5828        let dir = std::env::temp_dir();
5829        let path = dir.join("ffi_streaming_pre_encoded.tgm");
5830        let _ = std::fs::remove_file(&path);
5831
5832        let c_path = CString::new(path.to_str().unwrap()).unwrap();
5833        let meta_json = CString::new(r#"{"version":3}"#).unwrap();
5834
5835        let mut enc: *mut super::TgmStreamingEncoder = ptr::null_mut();
5836        let err = super::tgm_streaming_encoder_create(
5837            c_path.as_ptr(),
5838            meta_json.as_ptr(),
5839            ptr::null(),
5840            0,
5841            &mut enc,
5842        );
5843        assert!(matches!(err, super::TgmError::Ok));
5844
5845        let values = [7.0f32, 8.0];
5846        let data: Vec<u8> = values.iter().flat_map(|v| v.to_ne_bytes()).collect();
5847        let desc_json = CString::new(format!(
5848            r#"{{"type":"ntensor","ndim":1,"shape":[{len}],"strides":[1],"dtype":"float32","byte_order":"{bo}","encoding":"none","filter":"none","compression":"none"}}"#,
5849            len = values.len(),
5850            bo = if cfg!(target_endian = "little") { "little" } else { "big" },
5851        )).unwrap();
5852
5853        let err = super::tgm_streaming_encoder_write_pre_encoded(
5854            enc,
5855            desc_json.as_ptr(),
5856            data.as_ptr(),
5857            data.len(),
5858        );
5859        assert!(matches!(err, super::TgmError::Ok));
5860
5861        let err = super::tgm_streaming_encoder_finish(enc);
5862        assert!(matches!(err, super::TgmError::Ok));
5863        super::tgm_streaming_encoder_free(enc);
5864
5865        // Read back and verify
5866        let mut file: *mut super::TgmFile = ptr::null_mut();
5867        let err = super::tgm_file_open(c_path.as_ptr(), &mut file);
5868        assert!(matches!(err, super::TgmError::Ok));
5869
5870        let mut msg: *mut super::TgmMessage = ptr::null_mut();
5871        let err = super::tgm_file_decode_message(file, 0, 0, 0, 0, &mut msg);
5872        assert!(matches!(err, super::TgmError::Ok));
5873
5874        let mut dl: usize = 0;
5875        let dp = super::tgm_object_data(msg, 0, &mut dl);
5876        let decoded: Vec<f32> = unsafe { slice::from_raw_parts(dp, dl) }
5877            .chunks_exact(4)
5878            .map(|c| f32::from_ne_bytes(c.try_into().unwrap()))
5879            .collect();
5880        assert_eq!(decoded, values);
5881
5882        super::tgm_message_free(msg);
5883        super::tgm_file_close(file);
5884        let _ = std::fs::remove_file(&path);
5885    }
5886
5887    // ── tgm_encode with invalid hash algo ──
5888
5889    #[test]
5890    fn ffi_encode_invalid_hash_algo() {
5891        let json = CString::new(r#"{"version":3,"descriptors":[]}"#).unwrap();
5892        let bad_algo = CString::new("bogus").unwrap();
5893        let mut out = super::TgmBytes {
5894            data: ptr::null_mut(),
5895            len: 0,
5896        };
5897
5898        let err = super::tgm_encode(
5899            json.as_ptr(),
5900            ptr::null(),
5901            ptr::null(),
5902            0,
5903            bad_algo.as_ptr(),
5904            0,
5905            &mut out,
5906        );
5907        assert!(matches!(err, super::TgmError::InvalidArg));
5908    }
5909
5910    // ── tgm_scan_entry OOB returns sentinel and sets error ──
5911
5912    #[test]
5913    fn ffi_scan_entry_oob_returns_sentinel() {
5914        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
5915
5916        let mut result: *mut super::TgmScanResult = ptr::null_mut();
5917        let err = super::tgm_scan(encoded.as_ptr(), encoded.len(), &mut result);
5918        assert!(matches!(err, super::TgmError::Ok));
5919
5920        assert_eq!(super::tgm_scan_count(result), 1);
5921
5922        // Valid index works
5923        let good = super::tgm_scan_entry(result, 0);
5924        assert_eq!(good.offset, 0);
5925        assert!(good.length > 0);
5926
5927        // OOB index returns sentinel
5928        let bad = super::tgm_scan_entry(result, 1);
5929        assert_eq!(bad.offset, usize::MAX);
5930        assert_eq!(bad.length, 0);
5931
5932        // Error message is set
5933        let err_ptr = super::tgm_last_error();
5934        assert!(!err_ptr.is_null());
5935        let err_str = unsafe { CStr::from_ptr(err_ptr) }.to_str().unwrap();
5936        assert!(
5937            err_str.contains("out of range"),
5938            "expected OOB error, got: {err_str}"
5939        );
5940
5941        super::tgm_scan_free(result);
5942    }
5943
5944    // ── collect_data_slices null ptr + len=0 safety ──
5945
5946    #[test]
5947    fn ffi_encode_zero_length_null_data_accepted() {
5948        // Encode with a zero-element tensor where the data pointer could be null
5949        // but length is 0 — should succeed without UB.
5950        let json = CString::new(
5951            r#"{"version":3,"descriptors":[{"type":"ntensor","ndim":1,"shape":[0],"strides":[1],"dtype":"float32","byte_order":"little","encoding":"none","filter":"none","compression":"none"}]}"#,
5952        )
5953        .unwrap();
5954        let data_ptrs: [*const u8; 1] = [ptr::null()];
5955        let data_lens: [usize; 1] = [0];
5956        let mut out = super::TgmBytes {
5957            data: ptr::null_mut(),
5958            len: 0,
5959        };
5960
5961        let err = super::tgm_encode(
5962            json.as_ptr(),
5963            data_ptrs.as_ptr(),
5964            data_lens.as_ptr(),
5965            1,
5966            ptr::null(), // no hash
5967            0,           // threads
5968            &mut out,
5969        );
5970        assert!(
5971            matches!(err, super::TgmError::Ok),
5972            "encoding zero-length data with null pointer should succeed"
5973        );
5974        if !out.data.is_null() {
5975            super::tgm_bytes_free(out);
5976        }
5977    }
5978
5979    // ═══ Coverage-closer tests ═════════════════════════════════════════
5980
5981    // ── tgm_validate (previously zero tests) ───────────────────────────
5982
5983    #[test]
5984    fn ffi_validate_null_out() {
5985        let err = super::tgm_validate(ptr::null(), 0, ptr::null(), 0, ptr::null_mut());
5986        assert!(matches!(err, super::TgmError::InvalidArg));
5987    }
5988
5989    #[test]
5990    fn ffi_validate_null_buf_with_nonzero_len() {
5991        let mut out = super::TgmBytes {
5992            data: ptr::null_mut(),
5993            len: 0,
5994        };
5995        let err = super::tgm_validate(ptr::null(), 42, ptr::null(), 0, &mut out);
5996        assert!(matches!(err, super::TgmError::InvalidArg));
5997    }
5998
5999    #[test]
6000    fn ffi_validate_empty_buffer_ok() {
6001        // buf=null, len=0 → valid empty-buffer validation
6002        let mut out = super::TgmBytes {
6003            data: ptr::null_mut(),
6004            len: 0,
6005        };
6006        let err = super::tgm_validate(ptr::null(), 0, ptr::null(), 0, &mut out);
6007        assert!(matches!(err, super::TgmError::Ok));
6008        assert!(!out.data.is_null());
6009        assert!(out.len > 0);
6010        super::tgm_bytes_free(out);
6011    }
6012
6013    #[test]
6014    fn ffi_validate_valid_message_all_levels() {
6015        let encoded = ffi_encode_single_f32_tensor(&[1.0f32, 2.0, 3.0, 4.0], "");
6016        for level_str in &["quick", "checksum", "default", "full"] {
6017            let level = CString::new(*level_str).unwrap();
6018            let mut out = super::TgmBytes {
6019                data: ptr::null_mut(),
6020                len: 0,
6021            };
6022            let err =
6023                super::tgm_validate(encoded.as_ptr(), encoded.len(), level.as_ptr(), 0, &mut out);
6024            assert!(matches!(err, super::TgmError::Ok), "level {level_str}");
6025            super::tgm_bytes_free(out);
6026        }
6027    }
6028
6029    #[test]
6030    fn ffi_validate_canonical_flag() {
6031        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
6032        let mut out = super::TgmBytes {
6033            data: ptr::null_mut(),
6034            len: 0,
6035        };
6036        let err = super::tgm_validate(
6037            encoded.as_ptr(),
6038            encoded.len(),
6039            ptr::null(),
6040            1, // check_canonical
6041            &mut out,
6042        );
6043        assert!(matches!(err, super::TgmError::Ok));
6044        super::tgm_bytes_free(out);
6045    }
6046
6047    #[test]
6048    fn ffi_validate_invalid_level_string() {
6049        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
6050        let bogus = CString::new("bogus-level-name").unwrap();
6051        let mut out = super::TgmBytes {
6052            data: ptr::null_mut(),
6053            len: 0,
6054        };
6055        let err = super::tgm_validate(encoded.as_ptr(), encoded.len(), bogus.as_ptr(), 0, &mut out);
6056        assert!(matches!(err, super::TgmError::InvalidArg));
6057    }
6058
6059    #[test]
6060    fn ffi_validate_garbage_reports_issues() {
6061        let garbage = [0xDEu8; 100];
6062        let mut out = super::TgmBytes {
6063            data: ptr::null_mut(),
6064            len: 0,
6065        };
6066        let err = super::tgm_validate(garbage.as_ptr(), garbage.len(), ptr::null(), 0, &mut out);
6067        assert!(matches!(err, super::TgmError::Ok));
6068        let json = unsafe { slice::from_raw_parts(out.data, out.len) };
6069        let s = std::str::from_utf8(json).unwrap();
6070        assert!(s.contains("issues"));
6071        super::tgm_bytes_free(out);
6072    }
6073
6074    // ── tgm_validate_file (previously zero tests) ──────────────────────
6075
6076    #[test]
6077    fn ffi_validate_file_null_args() {
6078        let mut out = super::TgmBytes {
6079            data: ptr::null_mut(),
6080            len: 0,
6081        };
6082        let err = super::tgm_validate_file(ptr::null(), ptr::null(), 0, &mut out);
6083        assert!(matches!(err, super::TgmError::InvalidArg));
6084        let path = CString::new("/tmp/x.tgm").unwrap();
6085        let err = super::tgm_validate_file(path.as_ptr(), ptr::null(), 0, ptr::null_mut());
6086        assert!(matches!(err, super::TgmError::InvalidArg));
6087    }
6088
6089    #[test]
6090    fn ffi_validate_file_nonexistent() {
6091        let path = CString::new("/nonexistent/path/to/missing-file.tgm").unwrap();
6092        let mut out = super::TgmBytes {
6093            data: ptr::null_mut(),
6094            len: 0,
6095        };
6096        let err = super::tgm_validate_file(path.as_ptr(), ptr::null(), 0, &mut out);
6097        assert!(matches!(err, super::TgmError::Io));
6098    }
6099
6100    #[test]
6101    fn ffi_validate_file_valid_round_trip() {
6102        use std::io::Write;
6103        let encoded = ffi_encode_single_f32_tensor(&[1.0f32, 2.0, 3.0], "");
6104        let tmp = std::env::temp_dir().join(format!(
6105            "tensogram-ffi-validate-file-{}.tgm",
6106            std::process::id(),
6107        ));
6108        std::fs::File::create(&tmp)
6109            .unwrap()
6110            .write_all(&encoded)
6111            .unwrap();
6112        let path = CString::new(tmp.to_str().unwrap()).unwrap();
6113        let mut out = super::TgmBytes {
6114            data: ptr::null_mut(),
6115            len: 0,
6116        };
6117        let err = super::tgm_validate_file(path.as_ptr(), ptr::null(), 0, &mut out);
6118        assert!(matches!(err, super::TgmError::Ok));
6119        super::tgm_bytes_free(out);
6120        let _ = std::fs::remove_file(&tmp);
6121    }
6122
6123    #[test]
6124    fn ffi_validate_file_invalid_level() {
6125        let path = CString::new("/tmp/dummy.tgm").unwrap();
6126        let level = CString::new("bogus").unwrap();
6127        let mut out = super::TgmBytes {
6128            data: ptr::null_mut(),
6129            len: 0,
6130        };
6131        let err = super::tgm_validate_file(path.as_ptr(), level.as_ptr(), 0, &mut out);
6132        assert!(matches!(err, super::TgmError::InvalidArg));
6133    }
6134
6135    // ── Inline-hash integrity via tgm_validate on tampered payload ──
6136
6137    /// Flipping a byte inside the payload region of a hashed
6138    /// message must surface a hash mismatch through
6139    /// `tgm_validate` at the `integrity` / `checksum` level (the
6140    /// v3 integrity channel — see `plans/WIRE_FORMAT.md` §11).
6141    ///
6142    /// `tgm_decode` in v3 does **not** verify frame-level hashes
6143    /// (decode is a pure deserialisation path); the `verify_hash`
6144    /// flag on decode is retained for source compatibility but is
6145    /// a no-op.  Hash integrity is always a validate-level check.
6146    #[test]
6147    fn ffi_validate_detects_tampered_payload() {
6148        let values = vec![1.0f32; 256];
6149        let encoded = ffi_encode_with_hash(&values);
6150        let mut tampered = encoded.clone();
6151        // Tamper ~75% into the message so we're in the payload
6152        // region, not the frame header / CBOR descriptor.
6153        let pos = (tampered.len() * 75) / 100;
6154        tampered[pos] ^= 0xFF;
6155        tampered[pos + 1] ^= 0xFF;
6156
6157        let mut out = super::TgmBytes {
6158            data: ptr::null_mut(),
6159            len: 0,
6160        };
6161        let level = CString::new("checksum").unwrap();
6162        let err = super::tgm_validate(
6163            tampered.as_ptr(),
6164            tampered.len(),
6165            level.as_ptr(),
6166            /* pretty */ 0,
6167            &mut out,
6168        );
6169        // Validation itself runs fine; the report JSON flags the
6170        // mismatch.  `tgm_validate` returns Ok on any parseable
6171        // message and communicates findings via the JSON report.
6172        assert!(matches!(err, super::TgmError::Ok));
6173        assert!(!out.data.is_null());
6174        let json_bytes = unsafe { slice::from_raw_parts(out.data, out.len) };
6175        let json = std::str::from_utf8(json_bytes).unwrap();
6176        assert!(
6177            json.contains("HashMismatch") || json.contains("hash mismatch"),
6178            "expected HashMismatch in validate report, got: {json}"
6179        );
6180        super::tgm_bytes_free(out);
6181    }
6182
6183    // ── tgm_object_data with null out_len pointer ─────────────────────
6184
6185    #[test]
6186    fn ffi_object_data_null_out_len_no_crash() {
6187        let encoded = ffi_encode_single_f32_tensor(&[1.0f32], "");
6188        let mut msg: *mut super::TgmMessage = ptr::null_mut();
6189        let err = super::tgm_decode(
6190            encoded.as_ptr(),
6191            encoded.len(),
6192            /* native_byte_order */ 0,
6193            /* threads */ 0,
6194            0, // verify_hash
6195            &mut msg,
6196        );
6197        assert!(matches!(err, super::TgmError::Ok));
6198        // Calling with null out_len must not crash
6199        let data = super::tgm_object_data(msg, 0, ptr::null_mut());
6200        assert!(!data.is_null());
6201        super::tgm_message_free(msg);
6202    }
6203
6204    // ── tgm_decode_range on a compression that lacks a block index ────
6205
6206    #[test]
6207    fn ffi_decode_range_on_compressed_without_offsets() {
6208        // Encode with zstd compression which doesn't support range decode
6209        // without block offsets.
6210        let json = CString::new(
6211            r#"{"version":3,"descriptors":[{"type":"ntensor","ndim":1,"shape":[100],"strides":[1],"dtype":"float32","byte_order":"little","encoding":"none","filter":"none","compression":"zstd"}]}"#,
6212        )
6213        .unwrap();
6214        let data: Vec<u8> = vec![0u8; 400];
6215        let data_ptr: *const u8 = data.as_ptr();
6216        let data_len = data.len();
6217        let mut out = super::TgmBytes {
6218            data: ptr::null_mut(),
6219            len: 0,
6220        };
6221        let err = super::tgm_encode(
6222            json.as_ptr(),
6223            &data_ptr as *const *const u8,
6224            &data_len as *const usize,
6225            1,
6226            ptr::null(),
6227            /* threads */ 0,
6228            &mut out,
6229        );
6230        assert!(matches!(err, super::TgmError::Ok));
6231        let encoded = unsafe { slice::from_raw_parts(out.data, out.len) }.to_vec();
6232        super::tgm_bytes_free(out);
6233
6234        // Attempt to range-decode: should fail because zstd has no block index.
6235        let range_offset: u64 = 10;
6236        let range_count: u64 = 20;
6237        let mut out_buf = super::TgmBytes {
6238            data: ptr::null_mut(),
6239            len: 0,
6240        };
6241        let mut out_count: usize = 0;
6242        let err = super::tgm_decode_range(
6243            encoded.as_ptr(),
6244            encoded.len(),
6245            0,
6246            &range_offset as *const u64,
6247            &range_count as *const u64,
6248            1,
6249            /* native_byte_order */ 0,
6250            /* threads */ 0,
6251            /* join */ 1,
6252            &mut out_buf,
6253            &mut out_count,
6254        );
6255        assert!(!matches!(err, super::TgmError::Ok));
6256    }
6257
6258    // ── tgm_simple_packing_compute_params edge cases ──
6259
6260    #[test]
6261    fn ffi_simple_packing_null_values() {
6262        let mut ref_val: f64 = 0.0;
6263        let mut bsf: i32 = 0;
6264        let err =
6265            super::tgm_simple_packing_compute_params(ptr::null(), 0, 16, 0, &mut ref_val, &mut bsf);
6266        assert!(matches!(err, super::TgmError::InvalidArg));
6267    }
6268
6269    #[test]
6270    fn ffi_simple_packing_null_out_ref() {
6271        let values: [f64; 3] = [1.0, 2.0, 3.0];
6272        let mut bsf: i32 = 0;
6273        let err = super::tgm_simple_packing_compute_params(
6274            values.as_ptr(),
6275            3,
6276            16,
6277            0,
6278            ptr::null_mut(),
6279            &mut bsf,
6280        );
6281        assert!(matches!(err, super::TgmError::InvalidArg));
6282    }
6283
6284    #[test]
6285    fn ffi_simple_packing_null_out_bsf() {
6286        let values: [f64; 3] = [1.0, 2.0, 3.0];
6287        let mut ref_val: f64 = 0.0;
6288        let err = super::tgm_simple_packing_compute_params(
6289            values.as_ptr(),
6290            3,
6291            16,
6292            0,
6293            &mut ref_val,
6294            ptr::null_mut(),
6295        );
6296        assert!(matches!(err, super::TgmError::InvalidArg));
6297    }
6298
6299    // ── _with_options FFI coverage ─────────────────────────────────────
6300
6301    /// `tgm_encode_with_options(NULL mask_options)` behaves identically
6302    /// to `tgm_encode` — exercises the mask-options NULL-pointer path.
6303    #[test]
6304    fn ffi_encode_with_options_null_mask_ptr() {
6305        let values = [1.0f32, 2.0, 3.0, 4.0];
6306        let data: Vec<u8> = values.iter().flat_map(|v| v.to_ne_bytes()).collect();
6307        let json = format!(
6308            r#"{{"version":3,"descriptors":[{{"type":"ntensor","ndim":1,"shape":[{}],"strides":[1],"dtype":"float32","byte_order":"{}","encoding":"none","filter":"none","compression":"none"}}]}}"#,
6309            values.len(),
6310            if cfg!(target_endian = "little") {
6311                "little"
6312            } else {
6313                "big"
6314            },
6315        );
6316        let c_json = CString::new(json).unwrap();
6317        let hash_algo = CString::new("xxh3").unwrap();
6318        let data_ptr: *const u8 = data.as_ptr();
6319        let data_len: usize = data.len();
6320
6321        let mut out = super::TgmBytes {
6322            data: ptr::null_mut(),
6323            len: 0,
6324        };
6325        let err = super::tgm_encode_with_options(
6326            c_json.as_ptr(),
6327            &data_ptr as *const *const u8,
6328            &data_len as *const usize,
6329            1,
6330            hash_algo.as_ptr(),
6331            0,           // threads
6332            ptr::null(), // mask_options = NULL → defaults
6333            &mut out,
6334        );
6335        assert!(matches!(err, super::TgmError::Ok));
6336        assert!(!out.data.is_null() && out.len > 0);
6337        super::tgm_bytes_free(out);
6338    }
6339
6340    /// `tgm_decode_with_options(NULL mask_options)` behaves identically
6341    /// to `tgm_decode` — exercises the NULL-pointer branch of
6342    /// `apply_decode_mask_options`.
6343    #[test]
6344    fn ffi_decode_with_options_null_mask_ptr() {
6345        let values = [1.0f32, 2.0];
6346        let encoded = ffi_encode_single_f32_tensor(&values, "");
6347
6348        let mut msg: *mut super::TgmMessage = ptr::null_mut();
6349        let err = super::tgm_decode_with_options(
6350            encoded.as_ptr(),
6351            encoded.len(),
6352            0,
6353            0,
6354            0,           // verify_hash
6355            ptr::null(), // mask_options = NULL → default restore_non_finite=true
6356            &mut msg,
6357        );
6358        assert!(matches!(err, super::TgmError::Ok));
6359        assert!(!msg.is_null());
6360        super::tgm_message_free(msg);
6361    }
6362
6363    /// Non-NULL `TgmDecodeMaskOptions` with `restore_non_finite = false`
6364    /// threads through to `DecodeOptions` — pins the
6365    /// `apply_decode_mask_options` mutation path.
6366    #[test]
6367    fn ffi_decode_with_options_explicit_restore_false() {
6368        let values = [1.0f32, 2.0];
6369        let encoded = ffi_encode_single_f32_tensor(&values, "");
6370
6371        let mask_opts = super::TgmDecodeMaskOptions {
6372            restore_non_finite: false,
6373        };
6374        let mut msg: *mut super::TgmMessage = ptr::null_mut();
6375        let err = super::tgm_decode_with_options(
6376            encoded.as_ptr(),
6377            encoded.len(),
6378            0,
6379            0,
6380            0, // verify_hash
6381            &mask_opts,
6382            &mut msg,
6383        );
6384        assert!(matches!(err, super::TgmError::Ok));
6385        super::tgm_message_free(msg);
6386    }
6387
6388    /// `tgm_decode_with_options` with NULL output pointer must
6389    /// return InvalidArg and set `tgm_last_error`.
6390    #[test]
6391    fn ffi_decode_with_options_null_out() {
6392        let err =
6393            super::tgm_decode_with_options(b"x".as_ptr(), 1, 0, 0, 0, ptr::null(), ptr::null_mut());
6394        assert!(matches!(err, super::TgmError::InvalidArg));
6395        let msg = unsafe { CStr::from_ptr(super::tgm_last_error()) }
6396            .to_str()
6397            .unwrap();
6398        assert!(msg.contains("null"), "expected null-arg msg, got: {msg}");
6399    }
6400
6401    // ── tgm_doctor_to_json ──
6402
6403    #[test]
6404    fn ffi_doctor_to_json_returns_parseable_json() {
6405        let mut out = super::TgmBytes {
6406            data: ptr::null_mut(),
6407            len: 0,
6408        };
6409        let err = super::tgm_doctor_to_json(&mut out);
6410        assert!(matches!(err, super::TgmError::Ok));
6411        assert!(!out.data.is_null());
6412        assert!(out.len > 0);
6413
6414        let json_bytes = unsafe { slice::from_raw_parts(out.data, out.len) };
6415        let json_str = std::str::from_utf8(json_bytes).expect("doctor JSON is UTF-8");
6416        let parsed: serde_json::Value = serde_json::from_str(json_str).expect("doctor JSON parses");
6417
6418        // Schema parity with Python / WASM / Rust core: same three top-level keys.
6419        let obj = parsed.as_object().expect("top-level object");
6420        for key in ["build", "features", "self_test"] {
6421            assert!(obj.contains_key(key), "missing key '{key}' in: {obj:?}");
6422        }
6423
6424        super::tgm_bytes_free(out);
6425    }
6426
6427    #[test]
6428    fn ffi_doctor_to_json_null_out() {
6429        let err = super::tgm_doctor_to_json(ptr::null_mut());
6430        assert!(matches!(err, super::TgmError::InvalidArg));
6431        let msg = unsafe { CStr::from_ptr(super::tgm_last_error()) }
6432            .to_str()
6433            .unwrap();
6434        assert!(msg.contains("null"), "expected null-arg msg, got: {msg}");
6435    }
6436}
6437
6438/// Compute a hash of the given data.
6439/// Returns `TGM_ERROR_OK` on success, fills `out` with a `tgm_bytes_t`
6440/// containing the hex-encoded hash string (NOT null-terminated).
6441/// Free with `tgm_bytes_free`.
6442#[unsafe(no_mangle)]
6443pub extern "C" fn tgm_compute_hash(
6444    data: *const u8,
6445    data_len: usize,
6446    algo: *const c_char,
6447    out: *mut TgmBytes,
6448) -> TgmError {
6449    if data.is_null() || out.is_null() {
6450        set_last_error("null argument");
6451        return TgmError::InvalidArg;
6452    }
6453
6454    // v3 has exactly one algorithm; the FFI accepts NULL, "xxh3" or
6455    // "none" through `parse_hash_algo`.  When the caller passes
6456    // "none" we still compute and return the xxh3 digest because
6457    // `tgm_compute_hash` is the standalone "compute a digest" entry
6458    // point — there is no "no hash" output for it; the only thing
6459    // strict-input gets us is rejecting bogus algorithm names like
6460    // "sha256" with a clear error.
6461    if !algo.is_null() {
6462        let s = match unsafe { CStr::from_ptr(algo) }.to_str() {
6463            Ok(s) => s,
6464            Err(_) => {
6465                set_last_error("invalid UTF-8 in algo");
6466                return TgmError::InvalidArg;
6467            }
6468        };
6469        if let Err(e) = parse_hash_name(Some(s)) {
6470            set_last_error(&e.to_string());
6471            return TgmError::InvalidArg;
6472        }
6473    }
6474
6475    let input = unsafe { slice::from_raw_parts(data, data_len) };
6476    let hex = tensogram::hash::compute_hash(input);
6477    // Rebuild via boxed slice to guarantee capacity == len for tgm_bytes_free.
6478    let mut bytes = hex.into_bytes().into_boxed_slice().into_vec();
6479    let result = TgmBytes {
6480        data: bytes.as_mut_ptr(),
6481        len: bytes.len(),
6482    };
6483    std::mem::forget(bytes);
6484    unsafe {
6485        *out = result;
6486    }
6487    TgmError::Ok
6488}
6489
6490// ---------------------------------------------------------------------------
6491// Doctor: environment diagnostics
6492// ---------------------------------------------------------------------------
6493
6494/// Run environment diagnostics and serialise the report as a JSON byte buffer.
6495///
6496/// The report mirrors `tensogram::doctor::run_diagnostics()` and the
6497/// `tensogram doctor` CLI subcommand.  Cross-language parity: the same
6498/// JSON shape is produced by the Python `tensogram.doctor()` and the
6499/// WASM `doctor()` exports — see `docs/src/cli/doctor.md` for the
6500/// schema.  The C FFI build does **not** run the GRIB or NetCDF
6501/// converter self-tests (those features are CLI-only), so the
6502/// `self_test` array covers only the core encode/decode pipeline plus
6503/// the codecs compiled into the dylib.
6504///
6505/// On success returns `TgmError::Ok` and fills `out` with a JSON
6506/// payload (UTF-8, NOT null-terminated).  Use `tgm_bytes_free` to
6507/// release it.  Callers can safely treat `out.data` as a `char*` of
6508/// length `out.len` and pass it to `json_loads` / `nlohmann::json::parse`
6509/// / equivalent.
6510///
6511/// On serialisation failure returns `TgmError::Encoding` and writes a
6512/// human-readable description retrievable via `tgm_last_error()`.
6513///
6514/// # Example
6515///
6516/// ```c
6517/// tgm_bytes_t report = {0};
6518/// if (tgm_doctor_to_json(&report) == TGM_ERROR_OK) {
6519///     fwrite(report.data, 1, report.len, stdout);
6520///     tgm_bytes_free(report);
6521/// }
6522/// ```
6523#[unsafe(no_mangle)]
6524pub extern "C" fn tgm_doctor_to_json(out: *mut TgmBytes) -> TgmError {
6525    if out.is_null() {
6526        set_last_error("null out pointer");
6527        return TgmError::InvalidArg;
6528    }
6529
6530    let report = tensogram::doctor::run_diagnostics();
6531    let json = match serde_json::to_string(&report) {
6532        Ok(s) => s,
6533        Err(e) => {
6534            set_last_error(&format!("failed to serialise doctor report: {e}"));
6535            return TgmError::Encoding;
6536        }
6537    };
6538
6539    // Rebuild via boxed slice to guarantee capacity == len for tgm_bytes_free.
6540    let mut bytes = json.into_bytes().into_boxed_slice().into_vec();
6541    let result = TgmBytes {
6542        data: bytes.as_mut_ptr(),
6543        len: bytes.len(),
6544    };
6545    std::mem::forget(bytes); // ownership transferred to C
6546    unsafe {
6547        *out = result;
6548    }
6549    TgmError::Ok
6550}
6551
6552// ---------------------------------------------------------------------------
6553// Streaming encoder
6554// ---------------------------------------------------------------------------
6555
6556/// Opaque handle for a streaming encoder that writes data objects progressively.
6557pub struct TgmStreamingEncoder {
6558    inner: Option<StreamingEncoder<std::io::BufWriter<std::fs::File>>>,
6559}
6560
6561/// JSON used for streaming encoder creation — optional extra/base keys.
6562///
6563/// A legacy top-level `"version"` field is tolerated for pre-0.17
6564/// schema compatibility and routed into `_extra_` on encode, matching
6565/// the free-form contract of every other binding.  The wire-format
6566/// version itself lives in the preamble (see
6567/// `plans/WIRE_FORMAT.md` §3).
6568#[derive(serde::Deserialize)]
6569struct StreamingEncodeJson {
6570    #[serde(default)]
6571    version: Option<u16>,
6572    #[serde(default)]
6573    base: Vec<BTreeMap<String, serde_json::Value>>,
6574    #[serde(flatten)]
6575    extra: BTreeMap<String, serde_json::Value>,
6576}
6577
6578/// Parse metadata JSON for the streaming encoder (no "descriptors" key).
6579fn parse_streaming_metadata_json(json_str: &str) -> Result<GlobalMetadata, String> {
6580    let parsed: StreamingEncodeJson = serde_json::from_str(json_str)
6581        .map_err(|e| format!("failed to parse metadata JSON: {e}"))?;
6582
6583    let cbor_base: Vec<BTreeMap<String, ciborium::Value>> = parsed
6584        .base
6585        .into_iter()
6586        .map(|entry| {
6587            entry
6588                .into_iter()
6589                .map(|(k, v)| (k, json_to_cbor(v)))
6590                .collect()
6591        })
6592        .collect();
6593
6594    // Validate: no _reserved_ keys in base entries (library-managed namespace)
6595    for (i, entry) in cbor_base.iter().enumerate() {
6596        if entry.contains_key(RESERVED_KEY) {
6597            return Err(format!(
6598                "base[{i}] must not contain '{RESERVED_KEY}' key — the encoder populates it"
6599            ));
6600        }
6601    }
6602
6603    // Route explicit `_extra_` / legacy `version` under the shared
6604    // free-form merge rule.  Matches the `parse_encode_json` path and
6605    // the Python / TypeScript / Rust-core contract.
6606    let cbor_extra = merge_flattened_extras_with_version(parsed.extra, parsed.version)?;
6607    Ok(GlobalMetadata {
6608        base: cbor_base,
6609        extra: cbor_extra,
6610        ..Default::default()
6611    })
6612}
6613
6614/// Create a streaming encoder writing to a file.
6615///
6616/// `metadata_json` is a free-form JSON object.  The `"descriptors"`
6617/// key is NOT permitted here (objects are supplied one at a time
6618/// via `tgm_streaming_encoder_write`).  A legacy top-level
6619/// `"version"` is tolerated and routed into `_extra_` on encode
6620/// (see `parse_streaming_metadata_json`).
6621///
6622/// `hash_algo`: null-terminated string ("xxh3") or NULL for no hash.
6623///
6624/// On success fills `out` with a `TgmStreamingEncoder` handle.
6625/// Free with `tgm_streaming_encoder_free` or finalize with
6626/// `tgm_streaming_encoder_finish`.
6627#[unsafe(no_mangle)]
6628pub extern "C" fn tgm_streaming_encoder_create(
6629    path: *const c_char,
6630    metadata_json: *const c_char,
6631    hash_algo: *const c_char,
6632    threads: u32,
6633    out: *mut *mut TgmStreamingEncoder,
6634) -> TgmError {
6635    if path.is_null() || metadata_json.is_null() || out.is_null() {
6636        set_last_error("null argument");
6637        return TgmError::InvalidArg;
6638    }
6639
6640    let path_str = match unsafe { CStr::from_ptr(path) }.to_str() {
6641        Ok(s) => s,
6642        Err(e) => {
6643            set_last_error(&format!("invalid UTF-8 in path: {e}"));
6644            return TgmError::InvalidArg;
6645        }
6646    };
6647
6648    let json_str = match unsafe { CStr::from_ptr(metadata_json) }.to_str() {
6649        Ok(s) => s,
6650        Err(e) => {
6651            set_last_error(&format!("invalid UTF-8 in metadata_json: {e}"));
6652            return TgmError::InvalidArg;
6653        }
6654    };
6655
6656    let global_metadata = match parse_streaming_metadata_json(json_str) {
6657        Ok(m) => m,
6658        Err(e) => {
6659            set_last_error(&e);
6660            return TgmError::Metadata;
6661        }
6662    };
6663
6664    let hashing = match parse_hash_algo(hash_algo) {
6665        Ok(b) => b,
6666        Err((code, msg)) => {
6667            set_last_error(&msg);
6668            return code;
6669        }
6670    };
6671
6672    let file = match std::fs::File::create(path_str) {
6673        Ok(f) => f,
6674        Err(e) => {
6675            set_last_error(&e.to_string());
6676            return TgmError::Io;
6677        }
6678    };
6679
6680    let options = EncodeOptions {
6681        hashing,
6682        threads,
6683        ..Default::default()
6684    };
6685    let writer = std::io::BufWriter::new(file);
6686
6687    match StreamingEncoder::new(writer, &global_metadata, &options) {
6688        Ok(enc) => {
6689            let handle = Box::new(TgmStreamingEncoder { inner: Some(enc) });
6690            unsafe {
6691                *out = Box::into_raw(handle);
6692            }
6693            TgmError::Ok
6694        }
6695        Err(e) => {
6696            set_last_error(&e.to_string());
6697            to_error_code(&e)
6698        }
6699    }
6700}
6701
6702/// Write a PrecederMetadata frame for the next data object.
6703///
6704/// `metadata_json` is a JSON object with per-object metadata keys
6705/// (e.g. `{"mars": {"param": "2t"}, "units": "K"}`).  The keys
6706/// become `payload[0]` in a GlobalMetadata CBOR with empty `common`.
6707///
6708/// Must be followed by exactly one `tgm_streaming_encoder_write` call
6709/// before another preceder or `tgm_streaming_encoder_finish`.
6710#[unsafe(no_mangle)]
6711pub extern "C" fn tgm_streaming_encoder_write_preceder(
6712    enc: *mut TgmStreamingEncoder,
6713    metadata_json: *const c_char,
6714) -> TgmError {
6715    if enc.is_null() || metadata_json.is_null() {
6716        set_last_error("null argument");
6717        return TgmError::InvalidArg;
6718    }
6719
6720    let json_str = match unsafe { CStr::from_ptr(metadata_json) }.to_str() {
6721        Ok(s) => s,
6722        Err(e) => {
6723            set_last_error(&format!("invalid UTF-8 in metadata_json: {e}"));
6724            return TgmError::InvalidArg;
6725        }
6726    };
6727
6728    let map: BTreeMap<String, ciborium::Value> =
6729        match serde_json::from_str::<serde_json::Value>(json_str) {
6730            Ok(serde_json::Value::Object(obj)) => {
6731                obj.into_iter().map(|(k, v)| (k, json_to_cbor(v))).collect()
6732            }
6733            Ok(_) => {
6734                set_last_error("metadata_json must be a JSON object");
6735                return TgmError::Metadata;
6736            }
6737            Err(e) => {
6738                set_last_error(&format!("failed to parse metadata JSON: {e}"));
6739                return TgmError::Metadata;
6740            }
6741        };
6742
6743    let encoder = unsafe { &mut *enc };
6744    match encoder.inner.as_mut() {
6745        Some(inner) => match inner.write_preceder(map) {
6746            Ok(()) => TgmError::Ok,
6747            Err(e) => {
6748                set_last_error(&e.to_string());
6749                to_error_code(&e)
6750            }
6751        },
6752        None => {
6753            set_last_error("streaming encoder already finished");
6754            TgmError::InvalidArg
6755        }
6756    }
6757}
6758
6759/// Write a single data object to the streaming encoder.
6760///
6761/// `descriptor_json` is a JSON object with the descriptor fields
6762/// (type, ndim, shape, strides, dtype, byte_order, encoding, filter,
6763/// compression, etc.).
6764#[unsafe(no_mangle)]
6765pub extern "C" fn tgm_streaming_encoder_write(
6766    enc: *mut TgmStreamingEncoder,
6767    descriptor_json: *const c_char,
6768    data: *const u8,
6769    data_len: usize,
6770) -> TgmError {
6771    if enc.is_null() || descriptor_json.is_null() || data.is_null() {
6772        set_last_error("null argument");
6773        return TgmError::InvalidArg;
6774    }
6775
6776    let json_str = match unsafe { CStr::from_ptr(descriptor_json) }.to_str() {
6777        Ok(s) => s,
6778        Err(e) => {
6779            set_last_error(&format!("invalid UTF-8 in descriptor_json: {e}"));
6780            return TgmError::InvalidArg;
6781        }
6782    };
6783
6784    let descriptor: DataObjectDescriptor = match serde_json::from_str(json_str) {
6785        Ok(d) => d,
6786        Err(e) => {
6787            set_last_error(&format!("failed to parse descriptor JSON: {e}"));
6788            return TgmError::Metadata;
6789        }
6790    };
6791
6792    let data_slice = unsafe { slice::from_raw_parts(data, data_len) };
6793    let encoder = unsafe { &mut *enc };
6794
6795    match encoder.inner.as_mut() {
6796        Some(inner) => match inner.write_object(&descriptor, data_slice) {
6797            Ok(()) => TgmError::Ok,
6798            Err(e) => {
6799                set_last_error(&e.to_string());
6800                to_error_code(&e)
6801            }
6802        },
6803        None => {
6804            set_last_error("streaming encoder already finished");
6805            TgmError::InvalidArg
6806        }
6807    }
6808}
6809
6810/// Write a single pre-encoded data object to the streaming encoder.
6811///
6812/// Like `tgm_streaming_encoder_write`, but `data` must already be encoded
6813/// according to the descriptor's pipeline (`encoding` / `filter` /
6814/// `compression`). The library does not run the encoding pipeline — it
6815/// validates the descriptor's pipeline configuration and writes the bytes
6816/// as-is into a data object frame. The hash (if configured on the encoder)
6817/// is recomputed over the caller's bytes.
6818///
6819/// `descriptor_json`: same JSON schema as `tgm_streaming_encoder_write`.
6820///
6821/// For `szip` compression, callers SHOULD include `szip_block_offsets`
6822/// (bit offsets, not byte offsets) in the descriptor's params so that
6823/// `tgm_decode_range` can locate compressed block boundaries later.
6824/// Other pipeline params (e.g. `simple_packing` reference value, scale
6825/// factors) must also be present in the descriptor.
6826///
6827/// Any `hash` field embedded in the descriptor JSON is ignored — the
6828/// library always recomputes the hash from the caller's bytes.
6829#[unsafe(no_mangle)]
6830pub extern "C" fn tgm_streaming_encoder_write_pre_encoded(
6831    enc: *mut TgmStreamingEncoder,
6832    descriptor_json: *const c_char,
6833    data: *const u8,
6834    data_len: usize,
6835) -> TgmError {
6836    if enc.is_null() || descriptor_json.is_null() || data.is_null() {
6837        set_last_error("null argument");
6838        return TgmError::InvalidArg;
6839    }
6840
6841    let json_str = match unsafe { CStr::from_ptr(descriptor_json) }.to_str() {
6842        Ok(s) => s,
6843        Err(e) => {
6844            set_last_error(&format!("invalid UTF-8 in descriptor_json: {e}"));
6845            return TgmError::InvalidArg;
6846        }
6847    };
6848
6849    let descriptor: DataObjectDescriptor = match serde_json::from_str(json_str) {
6850        Ok(d) => d,
6851        Err(e) => {
6852            set_last_error(&format!("failed to parse descriptor JSON: {e}"));
6853            return TgmError::Metadata;
6854        }
6855    };
6856
6857    let data_slice = unsafe { slice::from_raw_parts(data, data_len) };
6858    let encoder = unsafe { &mut *enc };
6859
6860    match encoder.inner.as_mut() {
6861        Some(inner) => match inner.write_object_pre_encoded(&descriptor, data_slice) {
6862            Ok(()) => TgmError::Ok,
6863            Err(e) => {
6864                set_last_error(&e.to_string());
6865                to_error_code(&e)
6866            }
6867        },
6868        None => {
6869            set_last_error("streaming encoder already finished");
6870            TgmError::InvalidArg
6871        }
6872    }
6873}
6874
6875/// Return the number of objects written so far.
6876#[unsafe(no_mangle)]
6877pub extern "C" fn tgm_streaming_encoder_count(enc: *const TgmStreamingEncoder) -> usize {
6878    if enc.is_null() {
6879        return 0;
6880    }
6881    unsafe { (*enc).inner.as_ref().map(|e| e.object_count()).unwrap_or(0) }
6882}
6883
6884/// Finalize the streaming encoder, writing footer and closing the file.
6885///
6886/// After calling this, the handle is still valid but empty — the caller
6887/// must still call `tgm_streaming_encoder_free` to release it.
6888#[unsafe(no_mangle)]
6889pub extern "C" fn tgm_streaming_encoder_finish(enc: *mut TgmStreamingEncoder) -> TgmError {
6890    if enc.is_null() {
6891        set_last_error("null argument");
6892        return TgmError::InvalidArg;
6893    }
6894
6895    let encoder = unsafe { &mut *enc };
6896    match encoder.inner.take() {
6897        Some(inner) => match inner.finish() {
6898            Ok(_writer) => {
6899                // Writer is dropped, file is closed.
6900                // Do NOT free enc — caller must call tgm_streaming_encoder_free.
6901                TgmError::Ok
6902            }
6903            Err(e) => {
6904                set_last_error(&e.to_string());
6905                to_error_code(&e)
6906            }
6907        },
6908        None => {
6909            set_last_error("streaming encoder already finished");
6910            TgmError::InvalidArg
6911        }
6912    }
6913}
6914
6915/// Free a streaming encoder without finalizing (abandons the output).
6916#[unsafe(no_mangle)]
6917pub extern "C" fn tgm_streaming_encoder_free(enc: *mut TgmStreamingEncoder) {
6918    if !enc.is_null() {
6919        unsafe {
6920            drop(Box::from_raw(enc));
6921        }
6922    }
6923}
6924
6925// ---------------------------------------------------------------------------
6926// Validation
6927// ---------------------------------------------------------------------------
6928
6929/// Parse a C-string validation level into `ValidateOptions`.
6930fn parse_validate_options(
6931    level: *const c_char,
6932    check_canonical: i32,
6933) -> Result<ValidateOptions, (TgmError, String)> {
6934    let level_str = if level.is_null() {
6935        "default"
6936    } else {
6937        unsafe { CStr::from_ptr(level) }
6938            .to_str()
6939            .map_err(|_| (TgmError::InvalidArg, "invalid UTF-8 in level".to_string()))?
6940    };
6941
6942    let (max_level, checksum_only) = match level_str {
6943        "quick" => (ValidationLevel::Structure, false),
6944        "default" => (ValidationLevel::Integrity, false),
6945        "checksum" => (ValidationLevel::Integrity, true),
6946        "full" => (ValidationLevel::Fidelity, false),
6947        other => {
6948            return Err((
6949                TgmError::InvalidArg,
6950                format!(
6951                    "unknown validation level: '{}', expected one of: quick, default, checksum, full",
6952                    other
6953                ),
6954            ));
6955        }
6956    };
6957
6958    Ok(ValidateOptions {
6959        max_level,
6960        check_canonical: check_canonical != 0,
6961        checksum_only,
6962    })
6963}
6964
6965/// Validate a single Tensogram message buffer.
6966///
6967/// `buf` / `buf_len`: the wire-format message bytes (single message).
6968///   `buf` may be NULL when `buf_len` is 0 (empty-buffer validation).
6969/// `level`: validation depth — null-terminated C string:
6970///   `"quick"` (structure only), `"default"` (up to hash check),
6971///   `"checksum"` (hash check, suppress structural warnings),
6972///   `"full"` (full decode + NaN/Inf scan). NULL defaults to `"default"`.
6973/// `check_canonical`: non-zero to check RFC 8949 CBOR key ordering.
6974/// `out`: receives UTF-8 JSON bytes describing the validation report.
6975///   Not NUL-terminated — use `out->len` for the byte count.
6976///   Free with `tgm_bytes_free`.
6977///
6978/// Returns `TGM_ERROR_OK` on success (even if the message has issues —
6979/// the issues are in the JSON report). Returns `TGM_ERROR_INVALID_ARG`
6980/// for argument validation failures (null pointers, invalid level string),
6981/// or `TGM_ERROR_ENCODING` if JSON serialization of the report fails.
6982#[unsafe(no_mangle)]
6983pub extern "C" fn tgm_validate(
6984    buf: *const u8,
6985    buf_len: usize,
6986    level: *const c_char,
6987    check_canonical: i32,
6988    out: *mut TgmBytes,
6989) -> TgmError {
6990    if out.is_null() {
6991        set_last_error("null argument");
6992        return TgmError::InvalidArg;
6993    }
6994    // Allow buf=NULL when buf_len=0 (empty-buffer validation).
6995    if buf.is_null() && buf_len > 0 {
6996        set_last_error("null buf with non-zero buf_len");
6997        return TgmError::InvalidArg;
6998    }
6999
7000    let options = match parse_validate_options(level, check_canonical) {
7001        Ok(o) => o,
7002        Err((code, msg)) => {
7003            set_last_error(&msg);
7004            return code;
7005        }
7006    };
7007
7008    let data = if buf.is_null() {
7009        &[]
7010    } else {
7011        unsafe { slice::from_raw_parts(buf, buf_len) }
7012    };
7013    let report = validate_message(data, &options);
7014
7015    match serde_json::to_vec(&report) {
7016        Ok(json_bytes) => {
7017            let mut json_bytes = json_bytes.into_boxed_slice().into_vec();
7018            let result = TgmBytes {
7019                data: json_bytes.as_mut_ptr(),
7020                len: json_bytes.len(),
7021            };
7022            std::mem::forget(json_bytes);
7023            unsafe {
7024                *out = result;
7025            }
7026            TgmError::Ok
7027        }
7028        Err(e) => {
7029            set_last_error(&format!("JSON serialization failed: {e}"));
7030            TgmError::Encoding
7031        }
7032    }
7033}
7034
7035/// Validate all messages in a `.tgm` file.
7036///
7037/// `path`: null-terminated UTF-8 path to the file.
7038/// `level`: validation depth (same as `tgm_validate`). NULL = `"default"`.
7039/// `check_canonical`: non-zero to check CBOR key ordering.
7040/// `out`: receives UTF-8 JSON bytes describing the file validation report.
7041///   Not NUL-terminated — use `out->len` for the byte count.
7042///   Free with `tgm_bytes_free`.
7043///
7044/// Returns `TGM_ERROR_OK` on success (issues are in the JSON).
7045/// Returns `TGM_ERROR_IO` if the file cannot be opened or read.
7046/// Returns `TGM_ERROR_INVALID_ARG` for null pointers or invalid level.
7047/// Returns `TGM_ERROR_ENCODING` if JSON serialization of the report fails.
7048#[unsafe(no_mangle)]
7049pub extern "C" fn tgm_validate_file(
7050    path: *const c_char,
7051    level: *const c_char,
7052    check_canonical: i32,
7053    out: *mut TgmBytes,
7054) -> TgmError {
7055    if path.is_null() || out.is_null() {
7056        set_last_error("null argument");
7057        return TgmError::InvalidArg;
7058    }
7059
7060    let path_str = match unsafe { CStr::from_ptr(path) }.to_str() {
7061        Ok(s) => s,
7062        Err(e) => {
7063            set_last_error(&format!("invalid UTF-8 in path: {e}"));
7064            return TgmError::InvalidArg;
7065        }
7066    };
7067
7068    let options = match parse_validate_options(level, check_canonical) {
7069        Ok(o) => o,
7070        Err((code, msg)) => {
7071            set_last_error(&msg);
7072            return code;
7073        }
7074    };
7075
7076    let report = match core_validate_file(Path::new(path_str), &options) {
7077        Ok(r) => r,
7078        Err(e) => {
7079            set_last_error(&e.to_string());
7080            return TgmError::Io;
7081        }
7082    };
7083
7084    match serde_json::to_vec(&report) {
7085        Ok(json_bytes) => {
7086            let mut json_bytes = json_bytes.into_boxed_slice().into_vec();
7087            let result = TgmBytes {
7088                data: json_bytes.as_mut_ptr(),
7089                len: json_bytes.len(),
7090            };
7091            std::mem::forget(json_bytes);
7092            unsafe {
7093                *out = result;
7094            }
7095            TgmError::Ok
7096        }
7097        Err(e) => {
7098            set_last_error(&format!("JSON serialization failed: {e}"));
7099            TgmError::Encoding
7100        }
7101    }
7102}