velesdb_core/
half_precision.rs

1//! Half-precision floating point support for memory-efficient vector storage.
2//!
3//! This module provides f16 (IEEE 754 half-precision) and bf16 (bfloat16) support,
4//! reducing memory usage by 50% compared to f32 with minimal precision loss.
5//!
6//! # Memory Savings
7//!
8//! | Dimension | f32 Size | f16 Size | Savings |
9//! |-----------|----------|----------|---------|
10//! | 768 (BERT)| 3.0 KB   | 1.5 KB   | 50%     |
11//! | 1536 (GPT)| 6.0 KB   | 3.0 KB   | 50%     |
12//! | 4096      | 16.0 KB  | 8.0 KB   | 50%     |
13//!
14//! # Format Comparison
15//!
16//! - **f16**: IEEE 754 half-precision, best general compatibility
17//! - **bf16**: Brain float16, same exponent range as f32, better for ML
18//!
19//! # Usage
20//!
21//! ```rust
22//! use velesdb_core::half_precision::{VectorData, VectorPrecision};
23//!
24//! // Create from f32
25//! let v = VectorData::from_f32_slice(&[0.1, 0.2, 0.3], VectorPrecision::F16);
26//!
27//! // Convert back to f32 for calculations
28//! let f32_vec = v.to_f32_vec();
29//! ```
30
31use half::{bf16, f16};
32use serde::{Deserialize, Serialize};
33
34/// Vector precision format.
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
36pub enum VectorPrecision {
37    /// 32-bit floating point (4 bytes per dimension)
38    #[default]
39    F32,
40    /// 16-bit floating point IEEE 754 (2 bytes per dimension)
41    F16,
42    /// Brain float 16-bit (2 bytes per dimension, same exponent as f32)
43    BF16,
44}
45
46impl VectorPrecision {
47    /// Returns the size in bytes per dimension.
48    #[must_use]
49    pub const fn bytes_per_element(&self) -> usize {
50        match self {
51            Self::F32 => 4,
52            Self::F16 | Self::BF16 => 2,
53        }
54    }
55
56    /// Calculates total memory for a vector of given dimension.
57    #[must_use]
58    pub const fn memory_size(&self, dimension: usize) -> usize {
59        self.bytes_per_element() * dimension
60    }
61}
62
63/// Vector data supporting multiple precision formats.
64///
65/// Stores vectors in their native precision format to minimize memory usage.
66/// Provides conversion methods for distance calculations.
67#[derive(Debug, Clone, Serialize, Deserialize)]
68pub enum VectorData {
69    /// Full precision f32 vector
70    F32(Vec<f32>),
71    /// Half precision f16 vector (50% memory reduction)
72    F16(Vec<f16>),
73    /// Brain float bf16 vector (50% memory reduction, ML-optimized)
74    BF16(Vec<bf16>),
75}
76
77impl VectorData {
78    /// Creates a new `VectorData` from an f32 slice with the specified precision.
79    ///
80    /// # Arguments
81    ///
82    /// * `data` - Source f32 data
83    /// * `precision` - Target precision format
84    ///
85    /// # Example
86    ///
87    /// ```
88    /// use velesdb_core::half_precision::{VectorData, VectorPrecision};
89    ///
90    /// let v = VectorData::from_f32_slice(&[0.1, 0.2, 0.3], VectorPrecision::F16);
91    /// assert_eq!(v.len(), 3);
92    /// ```
93    #[must_use]
94    pub fn from_f32_slice(data: &[f32], precision: VectorPrecision) -> Self {
95        match precision {
96            VectorPrecision::F32 => Self::F32(data.to_vec()),
97            VectorPrecision::F16 => Self::F16(data.iter().map(|&x| f16::from_f32(x)).collect()),
98            VectorPrecision::BF16 => Self::BF16(data.iter().map(|&x| bf16::from_f32(x)).collect()),
99        }
100    }
101
102    /// Creates a new `VectorData` from an f32 vec, taking ownership.
103    #[must_use]
104    pub fn from_f32_vec(data: Vec<f32>, precision: VectorPrecision) -> Self {
105        match precision {
106            VectorPrecision::F32 => Self::F32(data),
107            VectorPrecision::F16 => Self::F16(data.iter().map(|&x| f16::from_f32(x)).collect()),
108            VectorPrecision::BF16 => Self::BF16(data.iter().map(|&x| bf16::from_f32(x)).collect()),
109        }
110    }
111
112    /// Returns the precision of this vector.
113    #[must_use]
114    pub const fn precision(&self) -> VectorPrecision {
115        match self {
116            Self::F32(_) => VectorPrecision::F32,
117            Self::F16(_) => VectorPrecision::F16,
118            Self::BF16(_) => VectorPrecision::BF16,
119        }
120    }
121
122    /// Returns the dimension (length) of the vector.
123    #[must_use]
124    pub fn len(&self) -> usize {
125        match self {
126            Self::F32(v) => v.len(),
127            Self::F16(v) => v.len(),
128            Self::BF16(v) => v.len(),
129        }
130    }
131
132    /// Returns true if the vector is empty.
133    #[must_use]
134    pub fn is_empty(&self) -> bool {
135        self.len() == 0
136    }
137
138    /// Returns the memory size in bytes.
139    #[must_use]
140    pub fn memory_size(&self) -> usize {
141        self.precision().memory_size(self.len())
142    }
143
144    /// Converts the vector to f32 for calculations.
145    ///
146    /// For F32 vectors, this clones the data.
147    /// For F16/BF16 vectors, this converts each element.
148    #[must_use]
149    pub fn to_f32_vec(&self) -> Vec<f32> {
150        match self {
151            Self::F32(v) => v.clone(),
152            Self::F16(v) => v.iter().map(|x| x.to_f32()).collect(),
153            Self::BF16(v) => v.iter().map(|x| x.to_f32()).collect(),
154        }
155    }
156
157    /// Returns a reference to the underlying f32 data if precision is F32.
158    ///
159    /// Returns `None` for F16/BF16 vectors.
160    #[must_use]
161    pub fn as_f32_slice(&self) -> Option<&[f32]> {
162        match self {
163            Self::F32(v) => Some(v.as_slice()),
164            Self::F16(_) | Self::BF16(_) => None,
165        }
166    }
167
168    /// Converts to another precision format.
169    #[must_use]
170    pub fn convert(&self, target: VectorPrecision) -> Self {
171        if self.precision() == target {
172            return self.clone();
173        }
174        Self::from_f32_slice(&self.to_f32_vec(), target)
175    }
176}
177
178impl From<Vec<f32>> for VectorData {
179    fn from(data: Vec<f32>) -> Self {
180        Self::F32(data)
181    }
182}
183
184impl From<&[f32]> for VectorData {
185    fn from(data: &[f32]) -> Self {
186        Self::F32(data.to_vec())
187    }
188}
189
190// =============================================================================
191// Distance calculations for half-precision vectors
192// =============================================================================
193
194/// Computes dot product between two `VectorData` with optimal precision handling.
195///
196/// For F32 vectors, uses SIMD-optimized f32 path.
197/// For F16/BF16 vectors, converts to f32 on the fly without allocation.
198#[must_use]
199pub fn dot_product(a: &VectorData, b: &VectorData) -> f32 {
200    use crate::simd_avx512::dot_product_auto;
201
202    match (a, b) {
203        (VectorData::F32(va), VectorData::F32(vb)) => dot_product_auto(va, vb),
204        (VectorData::F32(va), VectorData::F16(vb)) => {
205            va.iter().zip(vb.iter()).map(|(&x, y)| x * y.to_f32()).sum()
206        }
207        (VectorData::F16(va), VectorData::F32(vb)) => {
208            va.iter().zip(vb.iter()).map(|(x, &y)| x.to_f32() * y).sum()
209        }
210        (VectorData::F16(va), VectorData::F16(vb)) => va
211            .iter()
212            .zip(vb.iter())
213            .map(|(x, y)| x.to_f32() * y.to_f32())
214            .sum(),
215        (VectorData::F32(va), VectorData::BF16(vb)) => {
216            va.iter().zip(vb.iter()).map(|(&x, y)| x * y.to_f32()).sum()
217        }
218        (VectorData::BF16(va), VectorData::F32(vb)) => {
219            va.iter().zip(vb.iter()).map(|(x, &y)| x.to_f32() * y).sum()
220        }
221        (VectorData::BF16(va), VectorData::BF16(vb)) => va
222            .iter()
223            .zip(vb.iter())
224            .map(|(x, y)| x.to_f32() * y.to_f32())
225            .sum(),
226        // Fallback for mixed F16/BF16 (rare)
227        _ => {
228            let va = a.to_f32_vec();
229            let vb = b.to_f32_vec();
230            dot_product_auto(&va, &vb)
231        }
232    }
233}
234
235/// Computes cosine similarity between two `VectorData`.
236#[must_use]
237pub fn cosine_similarity(a: &VectorData, b: &VectorData) -> f32 {
238    use crate::simd_avx512::cosine_similarity_auto;
239
240    if let (VectorData::F32(va), VectorData::F32(vb)) = (a, b) {
241        cosine_similarity_auto(va, vb)
242    } else {
243        let dot = dot_product(a, b);
244        let norm_a = norm_squared(a).sqrt();
245        let norm_b = norm_squared(b).sqrt();
246
247        if norm_a < f32::EPSILON || norm_b < f32::EPSILON {
248            0.0
249        } else {
250            dot / (norm_a * norm_b)
251        }
252    }
253}
254
255/// Computes Euclidean distance between two `VectorData`.
256#[must_use]
257pub fn euclidean_distance(a: &VectorData, b: &VectorData) -> f32 {
258    use crate::simd_avx512::euclidean_auto;
259
260    match (a, b) {
261        (VectorData::F32(va), VectorData::F32(vb)) => euclidean_auto(va, vb),
262        (VectorData::F32(va), VectorData::F16(vb)) => va
263            .iter()
264            .zip(vb.iter())
265            .map(|(&x, y)| (x - y.to_f32()).powi(2))
266            .sum::<f32>()
267            .sqrt(),
268        (VectorData::F16(va), VectorData::F32(vb)) => va
269            .iter()
270            .zip(vb.iter())
271            .map(|(x, &y)| (x.to_f32() - y).powi(2))
272            .sum::<f32>()
273            .sqrt(),
274        (VectorData::F16(va), VectorData::F16(vb)) => va
275            .iter()
276            .zip(vb.iter())
277            .map(|(x, y)| (x.to_f32() - y.to_f32()).powi(2))
278            .sum::<f32>()
279            .sqrt(),
280        // Fallback for others
281        _ => {
282            let va = a.to_f32_vec();
283            let vb = b.to_f32_vec();
284            euclidean_auto(&va, &vb)
285        }
286    }
287}
288
289/// Helper to compute squared L2 norm without allocation
290fn norm_squared(v: &VectorData) -> f32 {
291    match v {
292        VectorData::F32(data) => data.iter().map(|&x| x * x).sum(),
293        VectorData::F16(data) => data
294            .iter()
295            .map(|x| {
296                let f = x.to_f32();
297                f * f
298            })
299            .sum(),
300        VectorData::BF16(data) => data
301            .iter()
302            .map(|x| {
303                let f = x.to_f32();
304                f * f
305            })
306            .sum(),
307    }
308}