Skip to main content

nodedb_types/
vector_dtype.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Vector storage precision tag.
4//!
5//! Selects the on-disk + in-memory dtype for vector storage on a per-collection
6//! basis. Independent of quantization (see [`crate::vector_ann::VectorQuantization`]):
7//! a collection can be `(F32, None)`, `(BF16, None)`, `(F32, RaBitQ)`,
8//! `(BF16, RaBitQ)`, etc. Storage dtype controls the durable form; quantization
9//! is an optional search-time overlay on top.
10
11/// Vector storage dtype for HNSW + flat indexes.
12///
13/// `F32` is the default and the historical NodeDB storage form. `F16` and `BF16`
14/// give 2x memory + disk savings with negligible recall loss for typical
15/// embedding workloads, at the cost of a slightly more expensive distance kernel
16/// (F16/BF16 must up-convert to F32 for arithmetic on hardware without native
17/// half-precision FMA, e.g., pre-AVX-512-FP16 x86).
18///
19/// `FP8` (E4M3 / E5M2) is deliberately omitted from this release; it is rare in
20/// vector-search workloads relative to the conversion-surface cost of supporting
21/// it, and the recall hit on typical embeddings (1.5-bit-ish effective mantissa
22/// precision) is severe. Reconsider when there is concrete user demand.
23#[repr(u8)]
24#[derive(
25    Debug,
26    Clone,
27    Copy,
28    Default,
29    PartialEq,
30    Eq,
31    Hash,
32    serde::Serialize,
33    serde::Deserialize,
34    zerompk::ToMessagePack,
35    zerompk::FromMessagePack,
36)]
37#[msgpack(c_enum)]
38#[non_exhaustive]
39pub enum VectorStorageDtype {
40    /// 32-bit IEEE 754 single precision. Default; 4 bytes per dim.
41    #[default]
42    F32 = 0,
43    /// 16-bit IEEE 754 half precision. 2 bytes per dim. ~3 decimal digits of
44    /// precision; ~6e-5 to 65504 range.
45    F16 = 1,
46    /// 16-bit Brain Float (Google bfloat16). 2 bytes per dim. Same exponent
47    /// range as F32 (~1e-38 to 3.4e38) but only ~7-bit mantissa. Better
48    /// dynamic range than F16; preferred for embedding workloads.
49    BF16 = 2,
50}
51
52impl VectorStorageDtype {
53    /// Bytes occupied per vector dimension at this dtype.
54    pub const fn bytes_per_dim(self) -> usize {
55        match self {
56            Self::F32 => 4,
57            Self::F16 => 2,
58            Self::BF16 => 2,
59        }
60    }
61
62    /// Total bytes needed to store `dim`-dimensional vector in this dtype.
63    pub const fn bytes_for_dim(self, dim: usize) -> usize {
64        dim * self.bytes_per_dim()
65    }
66
67    /// Stable lowercase string identifier — used in DDL parsing
68    /// (`WITH (storage_dtype='bf16')`) and in error messages.
69    pub const fn as_str(self) -> &'static str {
70        match self {
71            Self::F32 => "f32",
72            Self::F16 => "f16",
73            Self::BF16 => "bf16",
74        }
75    }
76
77    /// Parse from the lowercase identifier. Returns `None` for unknown values;
78    /// the caller wraps that in a typed error (e.g., `NodeDbError::bad_request`)
79    /// with a precise message naming the offending value.
80    pub fn parse(s: &str) -> Option<Self> {
81        match s {
82            "f32" => Some(Self::F32),
83            "f16" => Some(Self::F16),
84            "bf16" => Some(Self::BF16),
85            _ => None,
86        }
87    }
88}
89
90impl core::str::FromStr for VectorStorageDtype {
91    type Err = ();
92
93    fn from_str(s: &str) -> Result<Self, Self::Err> {
94        Self::parse(s).ok_or(())
95    }
96}
97
98impl core::fmt::Display for VectorStorageDtype {
99    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
100        f.write_str(self.as_str())
101    }
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107
108    #[test]
109    fn default_is_f32() {
110        assert_eq!(VectorStorageDtype::default(), VectorStorageDtype::F32);
111    }
112
113    #[test]
114    fn bytes_per_dim_matches_iec_widths() {
115        assert_eq!(VectorStorageDtype::F32.bytes_per_dim(), 4);
116        assert_eq!(VectorStorageDtype::F16.bytes_per_dim(), 2);
117        assert_eq!(VectorStorageDtype::BF16.bytes_per_dim(), 2);
118    }
119
120    #[test]
121    fn bytes_for_dim_is_dim_times_width() {
122        assert_eq!(VectorStorageDtype::F32.bytes_for_dim(128), 512);
123        assert_eq!(VectorStorageDtype::BF16.bytes_for_dim(1536), 3072);
124        assert_eq!(VectorStorageDtype::F16.bytes_for_dim(256), 512);
125    }
126
127    #[test]
128    fn as_str_roundtrips_from_str() {
129        for v in [
130            VectorStorageDtype::F32,
131            VectorStorageDtype::F16,
132            VectorStorageDtype::BF16,
133        ] {
134            assert_eq!(VectorStorageDtype::parse(v.as_str()), Some(v));
135        }
136    }
137
138    #[test]
139    fn from_str_unknown_returns_none() {
140        assert_eq!(VectorStorageDtype::parse("fp8"), None);
141        assert_eq!(VectorStorageDtype::parse("F32"), None);
142        assert_eq!(VectorStorageDtype::parse(""), None);
143    }
144
145    #[test]
146    fn display_matches_as_str() {
147        for v in [
148            VectorStorageDtype::F32,
149            VectorStorageDtype::F16,
150            VectorStorageDtype::BF16,
151        ] {
152            assert_eq!(format!("{}", v), v.as_str());
153        }
154    }
155
156    #[test]
157    fn msgpack_roundtrip() {
158        for v in [
159            VectorStorageDtype::F32,
160            VectorStorageDtype::F16,
161            VectorStorageDtype::BF16,
162        ] {
163            let bytes = zerompk::to_msgpack_vec(&v).unwrap();
164            let restored: VectorStorageDtype = zerompk::from_msgpack(&bytes).unwrap();
165            assert_eq!(restored, v);
166        }
167    }
168}