vortex-tensor 0.72.0

Vortex tensor extension type
Documentation
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! TurboQuant vector quantization encoding for Vortex.
//!
//! Implements a Stage 1 TurboQuant encoding ([arXiv:2504.19874], [RFC 0033]) for lossy
//! compression of high-dimensional vector data. The encoding operates on [`Vector`] extension
//! arrays, compressing their `FixedSizeList` storage into quantized codes after a structured
//! orthogonal surrogate rotation.
//!
//! [arXiv:2504.19874]: https://arxiv.org/abs/2504.19874
//! [RFC 0033]: https://vortex-data.github.io/rfcs/rfc/0033.html
//! [`Vector`]: crate::vector::Vector
//!
//! # Overview
//!
//! TurboQuant minimizes mean-squared reconstruction error (1-8 bits per coordinate)
//! using MSE-optimal scalar quantization on coordinates of a rotated unit vector.
//!
//! The encoding is decomposed into independently swappable layers:
//!
//! - **Normalization**: [`L2Denorm`] stores per-vector norms and wraps the compressed child.
//! - **Orthogonal transform**: [`SorfTransform`] records the SORF structured orthogonal
//!   transform and applies the inverse at decode time.
//! - **Quantization**: `DictArray(codes, centroids)` wrapped in `FixedSizeListArray` stores
//!   the per-coordinate codebook indices.
//!
//! The full encoded tree is:
//!
//! ```text
//! ScalarFnArray(L2Denorm, [
//!     ScalarFnArray(SorfTransform, [FSL(Dict(codes, centroids))]),
//!     norms
//! ])
//! ```
//!
//! When executed, the tree automatically decompresses: Dict dequantizes codes → SorfTransform
//! inverse-rotates → L2Denorm re-applies norms → original vectors (approximately).
//!
//! [`L2Denorm`]: crate::scalar_fns::l2_denorm::L2Denorm
//! [`SorfTransform`]: crate::scalar_fns::sorf_transform::SorfTransform
//!
//! The TurboQuant paper analyzes a full random orthogonal rotation. The current Vortex
//! implementation instead uses a fixed 3-round Walsh-Hadamard-based structured transform with
//! random sign diagonals generated by Vortex's frozen local SplitMix64 stream. This is a practical
//! approximation chosen for encode/decode efficiency, and should be understood as an
//! implementation choice rather than the exact construction used in the paper's proofs.
//!
//! The current encoding is also intentionally MSE-only. It does not yet implement the paper's QJL
//! residual correction for unbiased inner-product estimation, and it still uses internal
//! power-of-2 padding rather than the block decomposition proposed in RFC 0033.
//!
//! # Theoretical error bounds
//!
//! For unit-norm vectors quantized at `b` bits per coordinate, the paper's Theorem 1
//! guarantees normalized MSE distortion:
//!
//! > `E[||x - x_hat||^2 / ||x||^2] <= (sqrt(3) * pi / 2) / 4^b`
//!
//! | Bits | MSE bound  | Quality           |
//! |------|------------|-------------------|
//! | 1    | 6.80e-01   | Poor              |
//! | 2    | 1.70e-01   | Usable for ANN    |
//! | 3    | 4.25e-02   | Good              |
//! | 4    | 1.06e-02   | Very good         |
//! | 5    | 2.66e-03   | Excellent         |
//! | 6    | 6.64e-04   | Near-lossless     |
//! | 7    | 1.66e-04   | Near-lossless     |
//! | 8    | 4.15e-05   | Near-lossless     |
//!
//! # Compression ratios
//!
//! Each vector is stored as `padded_dim * bit_width / 8` bytes of quantized codes plus one stored
//! norm (in the [`L2Denorm`] wrapper). In the current implementation, that norm uses the vector's
//! element float type, not a separate fixed storage precision. Non-power-of-2 dimensions are
//! padded to the next power of 2 for the structured rotation, which reduces the effective ratio
//! for those dimensions.
//!
//! The table below assumes f32 input, so the stored norm is 4 bytes.
//!
//! | dim  | padded | bits | f32 bytes | TQ bytes | ratio  |
//! |------|--------|------|-----------|----------|--------|
//! |  768 |   1024 |    2 |      3072 |      260 | 11.8x  |
//! | 1024 |   1024 |    2 |      4096 |      260 | 15.8x  |
//! |  768 |   1024 |    4 |      3072 |      516 |  6.0x  |
//! | 1024 |   1024 |    4 |      4096 |      516 |  7.9x  |
//! |  768 |   1024 |    8 |      3072 |     1028 |  3.0x  |
//! | 1024 |   1024 |    8 |      4096 |     1028 |  4.0x  |
//!
//! # Example
//!
//! ```
//! use vortex_array::IntoArray;
//! use vortex_array::VortexSessionExecute;
//! use vortex_array::arrays::ExtensionArray;
//! use vortex_array::arrays::FixedSizeListArray;
//! use vortex_array::arrays::PrimitiveArray;
//! use vortex_array::extension::EmptyMetadata;
//! use vortex_array::session::ArraySession;
//! use vortex_array::validity::Validity;
//! use vortex_buffer::BufferMut;
//! use vortex_session::VortexSession;
//! use vortex_tensor::encodings::turboquant::{TurboQuantConfig, turboquant_encode};
//! use vortex_tensor::vector::Vector;
//!
//! // Create a Vector extension array of 100 random 128-d vectors.
//! let num_rows = 100;
//! let dim = 128u32;
//! let mut buf = BufferMut::<f32>::with_capacity(num_rows * dim as usize);
//! for i in 0..(num_rows * dim as usize) {
//!     buf.push((i as f32 * 0.001).sin());
//! }
//! let elements = PrimitiveArray::new::<f32>(buf.freeze(), Validity::NonNullable);
//! let fsl = FixedSizeListArray::try_new(
//!     elements.into_array(), dim, Validity::NonNullable, num_rows,
//! ).unwrap();
//! let vector = ExtensionArray::try_new_from_vtable(Vector, EmptyMetadata, fsl.into_array())
//!     .map(|ext| ext.into_array())
//!     .unwrap();
//!
//! // Normalize and quantize at 2 bits per coordinate in one pass.
//! let session = VortexSession::empty().with::<ArraySession>();
//! let mut ctx = session.create_execution_ctx();
//! let config = TurboQuantConfig { bit_width: 2, seed: 42, num_rounds: 3 };
//! let tq = turboquant_encode(vector, &config, &mut ctx).unwrap();
//!
//! // Verify compression: 100 vectors x 128 dims x 4 bytes = 51200 bytes input.
//! assert!(tq.nbytes() < 51200);
//! ```

pub(crate) mod centroids;
pub(crate) mod compress;

mod scheme;
pub use compress::TurboQuantConfig;
pub use compress::turboquant_encode;
pub use compress::turboquant_encode_unchecked;
pub use scheme::TurboQuantScheme;

/// Minimum vector dimension for TurboQuant encoding.
///
/// Note that this is not a theoretical minimum, it is mostly a practical one to limit the total
/// amount of distortion.
pub const MIN_DIMENSION: u32 = 128;

/// Maximum supported number of bits per quantized coordinate.
pub const MAX_BIT_WIDTH: u8 = 8;

/// Maximum supported number of centroids in the scalar quantizer codebook.
pub const MAX_CENTROIDS: usize = 1usize << (MAX_BIT_WIDTH as usize);

use vortex_array::dtype::DType;
use vortex_error::VortexResult;
use vortex_error::vortex_ensure;
use vortex_error::vortex_err;

use crate::types::vector::AnyVector;
use crate::types::vector::VectorMatcherMetadata;

/// Validates that `dtype` is a [`Vector`](crate::vector::Vector) extension type with
/// dimension >= [`MIN_DIMENSION`].
///
/// Returns the validated vector metadata on success.
pub fn tq_validate_vector_dtype(dtype: &DType) -> VortexResult<VectorMatcherMetadata> {
    let vector_metadata = dtype
        .as_extension_opt()
        .and_then(|ext| ext.metadata_opt::<AnyVector>())
        .ok_or_else(|| {
            vortex_err!("TurboQuant dtype must be a Vector extension type, got {dtype}")
        })?;

    let dimensions = vector_metadata.dimensions();
    vortex_ensure!(
        dimensions >= MIN_DIMENSION,
        "TurboQuant requires dimension >= {MIN_DIMENSION}, got {dimensions}",
    );

    Ok(vector_metadata)
}

#[cfg(test)]
mod tests;