tensogram-encodings 0.21.0

Encoding pipeline and compression codec registry for the Tensogram message format
Documentation
// (C) Copyright 2026- ECMWF and individual contributors.
//
// This software is licensed under the terms of the Apache Licence Version 2.0
// which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.

//! Backend version queries for each compiled-in codec.
//!
//! Each function returns a [`BackendVersion`] describing the codec name,
//! linkage model, and version string.  Functions are feature-gated to match
//! the codec they describe — callers should use `cfg!(feature = "...")` to
//! decide whether to call them.
//!
//! Version sources:
//! - **FFI codecs**: runtime C function calls (`ZSTD_versionString`, etc.)
//!   so the version reflects the *linked* library, not just the crate pin.
//! - **libaec**: compile-time C shim reading `AEC_VERSION_STR` from the
//!   libaec header built by `libaec-sys`.
//! - **Pure-Rust crates**: `built.rs` dependency table captured at compile
//!   time from `Cargo.lock`.

// Include the compile-time dependency table generated by the `built` crate.
// Gated to the features that actually read [`DEPENDENCIES`] so a
// no-default-features build doesn't trip the `unused_imports`/`dead_code` lints.
#[cfg(any(
    feature = "szip-pure",
    feature = "zstd-pure",
    feature = "lz4",
    feature = "sz3",
    feature = "threads",
))]
mod built_info {
    include!(concat!(env!("OUT_DIR"), "/built.rs"));
}

/// Linkage model for a codec backend.
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
#[serde(rename_all = "kebab-case")]
pub enum Linkage {
    /// C FFI — the version reflects the dynamically or statically linked C library.
    Ffi,
    /// Pure Rust — the version is the crate version from `Cargo.lock`.
    PureRust,
}

/// Version information for a single codec backend.
#[derive(Debug, Clone, serde::Serialize)]
pub struct BackendVersion {
    /// Short human-readable name of the underlying library (e.g. `"libzstd"`).
    pub name: &'static str,
    /// How the library is linked.
    pub linkage: Linkage,
    /// Version string, or `None` when the version cannot be determined.
    pub version: Option<String>,
}

impl BackendVersion {
    /// Construct a [`BackendVersion`] for a C FFI backend.
    pub fn ffi(name: &'static str, version: Option<String>) -> Self {
        Self {
            name,
            linkage: Linkage::Ffi,
            version,
        }
    }

    /// Construct a [`BackendVersion`] for a pure-Rust backend.
    pub fn pure_rust(name: &'static str, version: Option<String>) -> Self {
        Self {
            name,
            linkage: Linkage::PureRust,
            version,
        }
    }
}

/// Look up a crate version from the compile-time `Cargo.lock` snapshot.
///
/// Returns the version string for the first entry matching `crate_name`, or
/// `None` if the crate is not in the dependency tree.  Gated to features
/// that actually consume it so a no-default-features build doesn't trip
/// the `dead_code` lint.
#[cfg(any(
    feature = "szip-pure",
    feature = "zstd-pure",
    feature = "lz4",
    feature = "sz3",
    feature = "threads",
))]
fn dep_version(crate_name: &str) -> Option<String> {
    built_info::DEPENDENCIES
        .iter()
        .find(|(name, _)| *name == crate_name)
        .map(|(_, ver)| ver.to_string())
}

/// Convert a null-terminated C string pointer into an owned [`String`].
///
/// Returns `None` if the pointer is null, or if the decoded string is
/// empty or contains only whitespace — the [`BackendVersion::version`]
/// contract treats those cases as "version cannot be determined" rather
/// than as an empty-but-known string.
///
/// Public and unconditionally compiled so any caller (e.g. the CLI's
/// converter-version helpers for grib/netcdf) can share the same
/// parsing logic without reimplementing the null/empty handling.
///
/// # Safety
///
/// `ptr` must either be null, or point to a NUL-terminated C string that
/// remains valid (immutable, properly aligned) for the duration of this
/// call.  Every C library version function used here returns a pointer to
/// static, immutable, NUL-terminated string literal storage; libaec /
/// libzstd / libblosc2 / libzfp all document this contract explicitly.
pub unsafe fn cstr_ptr_to_owned(ptr: *const std::ffi::c_char) -> Option<String> {
    if ptr.is_null() {
        return None;
    }
    // SAFETY: caller guarantees `ptr` points to a valid NUL-terminated
    // C string that lives at least until this function returns.
    let s = unsafe { std::ffi::CStr::from_ptr(ptr) }
        .to_string_lossy()
        .into_owned();
    if s.trim().is_empty() { None } else { Some(s) }
}

// ── szip (libaec FFI) ────────────────────────────────────────────────────────

#[cfg(feature = "szip")]
unsafe extern "C" {
    /// Provided by `build_shim/libaec_version.c`, compiled in `build.rs`.
    /// Returns a pointer to libaec's `AEC_VERSION_STR` macro expansion —
    /// always a non-null static string literal owned by libaec itself.
    fn tensogram_libaec_version() -> *const std::ffi::c_char;
}

/// Version of the libaec library used for szip compression (FFI backend).
#[cfg(feature = "szip")]
pub fn szip_ffi_version() -> BackendVersion {
    // SAFETY: `tensogram_libaec_version` is provided by our compile-time
    // C shim and returns a pointer to libaec's static `AEC_VERSION_STR`
    // string literal.  The pointer is always non-null and points to
    // immutable static storage; reading it via CStr is sound.
    let version = unsafe { cstr_ptr_to_owned(tensogram_libaec_version()) };
    BackendVersion::ffi("libaec", version)
}

/// Version of the pure-Rust szip implementation.
#[cfg(feature = "szip-pure")]
pub fn szip_pure_version() -> BackendVersion {
    BackendVersion::pure_rust("tensogram-szip", dep_version("tensogram-szip"))
}

// ── zstd ────────────────────────────────────────────────────────────────────

#[cfg(feature = "zstd")]
unsafe extern "C" {
    /// Returns libzstd's static version string ("1.5.7" etc.).  Documented
    /// in `<zstd.h>` as always returning a non-null literal.
    fn ZSTD_versionString() -> *const std::ffi::c_char;
}

/// Version of the libzstd library (FFI backend).
#[cfg(feature = "zstd")]
pub fn zstd_ffi_version() -> BackendVersion {
    // SAFETY: `ZSTD_versionString()` is documented to return a pointer to
    // immutable static string storage owned by libzstd.
    let version = unsafe { cstr_ptr_to_owned(ZSTD_versionString()) };
    BackendVersion::ffi("libzstd", version)
}

/// Version of the pure-Rust zstd implementation.
#[cfg(feature = "zstd-pure")]
pub fn zstd_pure_version() -> BackendVersion {
    BackendVersion::pure_rust("ruzstd", dep_version("ruzstd"))
}

// ── lz4 ─────────────────────────────────────────────────────────────────────

/// Version of the lz4_flex pure-Rust implementation.
#[cfg(feature = "lz4")]
pub fn lz4_version() -> BackendVersion {
    BackendVersion::pure_rust("lz4_flex", dep_version("lz4_flex"))
}

// ── blosc2 ──────────────────────────────────────────────────────────────────

#[cfg(feature = "blosc2")]
unsafe extern "C" {
    /// Returns libblosc2's static version string.  Documented in
    /// `<blosc2.h>` to return a pointer to immutable string storage.
    fn blosc2_get_version_string() -> *const std::ffi::c_char;
}

/// Version of the libblosc2 library (FFI backend).
#[cfg(feature = "blosc2")]
pub fn blosc2_version() -> BackendVersion {
    // SAFETY: `blosc2_get_version_string()` returns a pointer to static
    // string storage owned by libblosc2.
    let version = unsafe { cstr_ptr_to_owned(blosc2_get_version_string()) };
    BackendVersion::ffi("libblosc2", version)
}

// ── zfp ─────────────────────────────────────────────────────────────────────

#[cfg(feature = "zfp")]
unsafe extern "C" {
    /// `const char* zfp_version_string` — a global C variable in libzfp
    /// that holds a pointer to the library's static version string.
    static zfp_version_string: *const std::ffi::c_char;
}

/// Version of the libzfp library (FFI backend).
#[cfg(feature = "zfp")]
pub fn zfp_version() -> BackendVersion {
    // SAFETY: `zfp_version_string` is a const global pointer initialised
    // once when libzfp is loaded; it points to immutable string storage
    // and is never mutated after initialisation.  Reading the pointer
    // value and dereferencing it is sound.
    let version = unsafe { cstr_ptr_to_owned(zfp_version_string) };
    BackendVersion::ffi("libzfp", version)
}

// ── sz3 ─────────────────────────────────────────────────────────────────────

/// Version of the SZ3 library (FFI backend, compile-time pin from Cargo.lock).
///
/// SZ3 does not expose a runtime version function, so we use the
/// `tensogram-sz3` crate version from `Cargo.lock` as a proxy.
#[cfg(feature = "sz3")]
pub fn sz3_version() -> BackendVersion {
    BackendVersion::ffi("SZ3", dep_version("tensogram-sz3"))
}

// ── threads (rayon) ─────────────────────────────────────────────────────────

/// Version of the rayon crate used for multi-threaded pipeline execution.
#[cfg(feature = "threads")]
pub fn rayon_version() -> BackendVersion {
    BackendVersion::pure_rust("rayon", dep_version("rayon"))
}

// ── tests ────────────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    fn has_digit(s: &str) -> bool {
        s.chars().any(|c| c.is_ascii_digit())
    }

    // ── cstr_ptr_to_owned: contract tests with synthetic pointers ───────────

    /// Helper that builds a NUL-terminated `Vec<c_char>` we can take a stable
    /// pointer into for testing.  The returned `Vec` must outlive the pointer.
    fn cstr_buf(s: &str) -> Vec<std::ffi::c_char> {
        let mut buf: Vec<std::ffi::c_char> = s.bytes().map(|b| b as std::ffi::c_char).collect();
        buf.push(0); // NUL terminator
        buf
    }

    #[test]
    fn cstr_ptr_to_owned_returns_none_for_null() {
        // SAFETY: passing a null pointer is explicitly allowed by the
        // function's safety contract.
        let result = unsafe { cstr_ptr_to_owned(std::ptr::null()) };
        assert_eq!(result, None);
    }

    #[test]
    fn cstr_ptr_to_owned_returns_none_for_empty_string() {
        let buf = cstr_buf("");
        // SAFETY: `buf` outlives the call; pointer is valid and NUL-terminated.
        let result = unsafe { cstr_ptr_to_owned(buf.as_ptr()) };
        assert_eq!(
            result, None,
            "empty C string should produce None, got {result:?}"
        );
    }

    #[test]
    fn cstr_ptr_to_owned_returns_none_for_whitespace_only() {
        let buf = cstr_buf("   \t\n  ");
        // SAFETY: `buf` outlives the call; pointer is valid and NUL-terminated.
        let result = unsafe { cstr_ptr_to_owned(buf.as_ptr()) };
        assert_eq!(
            result, None,
            "all-whitespace C string should produce None, got {result:?}"
        );
    }

    #[test]
    fn cstr_ptr_to_owned_preserves_normal_string() {
        let buf = cstr_buf("1.2.3");
        // SAFETY: `buf` outlives the call; pointer is valid and NUL-terminated.
        let result = unsafe { cstr_ptr_to_owned(buf.as_ptr()) };
        assert_eq!(result.as_deref(), Some("1.2.3"));
    }

    #[test]
    fn cstr_ptr_to_owned_keeps_internal_whitespace() {
        // Only leading-and-trailing whitespace triggers the empty check;
        // mid-string spaces are preserved (e.g. libnetcdf's "X.Y.Z of <date>").
        let buf = cstr_buf("4.10.0 of Apr  3 2024");
        // SAFETY: `buf` outlives the call; pointer is valid and NUL-terminated.
        let result = unsafe { cstr_ptr_to_owned(buf.as_ptr()) };
        assert_eq!(result.as_deref(), Some("4.10.0 of Apr  3 2024"));
    }

    #[test]
    #[cfg(feature = "szip")]
    fn szip_ffi_version_non_empty() {
        let v = szip_ffi_version();
        let ver = v.version.expect("libaec version should be present");
        assert!(!ver.is_empty(), "libaec version string is empty");
        assert!(has_digit(&ver), "libaec version has no digit: {ver}");
    }

    #[test]
    #[cfg(feature = "zstd")]
    fn zstd_ffi_version_non_empty() {
        let v = zstd_ffi_version();
        let ver = v.version.expect("libzstd version should be present");
        assert!(!ver.is_empty());
        assert!(has_digit(&ver), "zstd version has no digit: {ver}");
    }

    #[test]
    #[cfg(feature = "lz4")]
    fn lz4_version_non_empty() {
        let v = lz4_version();
        let ver = v.version.expect("lz4_flex version should be present");
        assert!(!ver.is_empty());
        assert!(has_digit(&ver), "lz4 version has no digit: {ver}");
    }

    #[test]
    #[cfg(feature = "blosc2")]
    fn blosc2_version_non_empty() {
        let v = blosc2_version();
        let ver = v.version.expect("libblosc2 version should be present");
        assert!(!ver.is_empty());
        assert!(has_digit(&ver), "blosc2 version has no digit: {ver}");
    }

    #[test]
    #[cfg(feature = "zfp")]
    fn zfp_version_non_empty() {
        let v = zfp_version();
        let ver = v.version.expect("libzfp version should be present");
        assert!(!ver.is_empty());
        assert!(has_digit(&ver), "zfp version has no digit: {ver}");
    }

    #[test]
    #[cfg(feature = "sz3")]
    fn sz3_version_non_empty() {
        let v = sz3_version();
        let ver = v.version.expect("sz3 version should be present");
        assert!(!ver.is_empty());
        assert!(has_digit(&ver), "sz3 version has no digit: {ver}");
    }

    #[test]
    #[cfg(feature = "threads")]
    fn rayon_version_non_empty() {
        let v = rayon_version();
        let ver = v.version.expect("rayon version should be present");
        assert!(!ver.is_empty());
        assert!(has_digit(&ver), "rayon version has no digit: {ver}");
    }
}