Skip to main content

oxirs_vec/
compression_codecs.rs

1//! Codec implementations: run-length encoding, delta encoding,
2//! scalar quantization, product quantization, PCA compression.
3
4use super::compression_types::VectorCompressor;
5use crate::{Vector, VectorData, VectorError};
6use std::io::{Read, Write};
7
8// ─────────────────────────────────────────────────────────────────────────────
9// Euclidean distance helper (used by ProductQuantizer)
10// ─────────────────────────────────────────────────────────────────────────────
11
12pub(crate) fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
13    a.iter()
14        .zip(b.iter())
15        .map(|(&x, &y)| (x - y).powi(2))
16        .sum::<f32>()
17        .sqrt()
18}
19
20// ─────────────────────────────────────────────────────────────────────────────
21// ZstdCompressor
22// ─────────────────────────────────────────────────────────────────────────────
23
24/// Compressor backed by zstd (Pure Rust via oxiarc-zstd)
25pub struct ZstdCompressor {
26    pub(crate) level: i32,
27}
28
29impl ZstdCompressor {
30    pub fn new(level: i32) -> Self {
31        Self {
32            level: level.clamp(1, 22),
33        }
34    }
35}
36
37impl VectorCompressor for ZstdCompressor {
38    fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
39        let bytes = vector_to_bytes(vector)?;
40        oxiarc_zstd::encode_all(&bytes, self.level)
41            .map_err(|e| VectorError::CompressionError(e.to_string()))
42    }
43
44    fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
45        let decompressed = oxiarc_zstd::decode_all(data)
46            .map_err(|e| VectorError::CompressionError(e.to_string()))?;
47        bytes_to_vector(&decompressed, dimensions)
48    }
49
50    fn compression_ratio(&self) -> f32 {
51        match self.level {
52            1..=3 => 0.7,
53            4..=9 => 0.5,
54            10..=15 => 0.4,
55            16..=22 => 0.3,
56            _ => 1.0,
57        }
58    }
59}
60
61// ─────────────────────────────────────────────────────────────────────────────
62// ScalarQuantizer
63// ─────────────────────────────────────────────────────────────────────────────
64
65/// Uniform scalar quantization to N bits
66pub struct ScalarQuantizer {
67    pub(crate) bits: u8,
68    pub(crate) min_val: f32,
69    pub(crate) max_val: f32,
70}
71
72impl ScalarQuantizer {
73    pub fn new(bits: u8) -> Self {
74        Self {
75            bits: bits.clamp(1, 16),
76            min_val: 0.0,
77            max_val: 1.0,
78        }
79    }
80
81    pub fn with_range(bits: u8, min_val: f32, max_val: f32) -> Self {
82        Self {
83            bits: bits.clamp(1, 16),
84            min_val,
85            max_val,
86        }
87    }
88
89    pub fn train(&mut self, vectors: &[Vector]) -> Result<(), VectorError> {
90        if vectors.is_empty() {
91            return Err(VectorError::InvalidDimensions(
92                "No vectors to train on".to_string(),
93            ));
94        }
95
96        let mut min = f32::INFINITY;
97        let mut max = f32::NEG_INFINITY;
98
99        for vector in vectors {
100            match &vector.values {
101                VectorData::F32(v) => {
102                    for &val in v {
103                        min = min.min(val);
104                        max = max.max(val);
105                    }
106                }
107                VectorData::F64(v) => {
108                    for &val in v {
109                        min = min.min(val as f32);
110                        max = max.max(val as f32);
111                    }
112                }
113                _ => {}
114            }
115        }
116
117        self.min_val = min;
118        self.max_val = max;
119        Ok(())
120    }
121
122    pub(crate) fn quantize_value(&self, value: f32) -> u16 {
123        let normalized = ((value - self.min_val) / (self.max_val - self.min_val)).clamp(0.0, 1.0);
124        let max_quant_val = (1u32 << self.bits) - 1;
125        (normalized * max_quant_val as f32).round() as u16
126    }
127
128    pub(crate) fn dequantize_value(&self, quantized: u16) -> f32 {
129        let max_quant_val = (1u32 << self.bits) - 1;
130        let normalized = quantized as f32 / max_quant_val as f32;
131        normalized * (self.max_val - self.min_val) + self.min_val
132    }
133}
134
135impl VectorCompressor for ScalarQuantizer {
136    fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
137        let values = match &vector.values {
138            VectorData::F32(v) => v.clone(),
139            VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
140            _ => {
141                return Err(VectorError::UnsupportedOperation(
142                    "Quantization only supports float vectors".to_string(),
143                ))
144            }
145        };
146
147        let mut compressed = Vec::new();
148
149        compressed.write_all(&self.bits.to_le_bytes())?;
150        compressed.write_all(&self.min_val.to_le_bytes())?;
151        compressed.write_all(&self.max_val.to_le_bytes())?;
152
153        if self.bits <= 8 {
154            for val in values {
155                let quantized = self.quantize_value(val) as u8;
156                compressed.push(quantized);
157            }
158        } else {
159            for val in values {
160                let quantized = self.quantize_value(val);
161                compressed.write_all(&quantized.to_le_bytes())?;
162            }
163        }
164
165        Ok(compressed)
166    }
167
168    fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
169        let mut cursor = std::io::Cursor::new(data);
170
171        let mut bits_buf = [0u8; 1];
172        cursor.read_exact(&mut bits_buf)?;
173        let bits = bits_buf[0];
174
175        let mut min_buf = [0u8; 4];
176        cursor.read_exact(&mut min_buf)?;
177        let min_val = f32::from_le_bytes(min_buf);
178
179        let mut max_buf = [0u8; 4];
180        cursor.read_exact(&mut max_buf)?;
181        let max_val = f32::from_le_bytes(max_buf);
182
183        let quantizer = ScalarQuantizer {
184            bits,
185            min_val,
186            max_val,
187        };
188
189        let mut values = Vec::with_capacity(dimensions);
190
191        if bits <= 8 {
192            let mut buf = [0u8; 1];
193            for _ in 0..dimensions {
194                cursor.read_exact(&mut buf)?;
195                let quantized = buf[0] as u16;
196                values.push(quantizer.dequantize_value(quantized));
197            }
198        } else {
199            let mut buf = [0u8; 2];
200            for _ in 0..dimensions {
201                cursor.read_exact(&mut buf)?;
202                let quantized = u16::from_le_bytes(buf);
203                values.push(quantizer.dequantize_value(quantized));
204            }
205        }
206
207        Ok(Vector::new(values))
208    }
209
210    fn compression_ratio(&self) -> f32 {
211        self.bits as f32 / 32.0
212    }
213}
214
215// ─────────────────────────────────────────────────────────────────────────────
216// PcaCompressor
217// ─────────────────────────────────────────────────────────────────────────────
218
219/// PCA-based dimensionality-reduction compressor
220pub struct PcaCompressor {
221    pub(crate) components: usize,
222    pub(crate) mean: Vec<f32>,
223    pub(crate) components_matrix: Vec<Vec<f32>>,
224    pub(crate) explained_variance_ratio: Vec<f32>,
225}
226
227impl PcaCompressor {
228    pub fn new(components: usize) -> Self {
229        Self {
230            components,
231            mean: Vec::new(),
232            components_matrix: Vec::new(),
233            explained_variance_ratio: Vec::new(),
234        }
235    }
236
237    pub fn train(&mut self, vectors: &[Vector]) -> Result<(), VectorError> {
238        if vectors.is_empty() {
239            return Err(VectorError::InvalidDimensions(
240                "No vectors to train on".to_string(),
241            ));
242        }
243
244        let data: Vec<Vec<f32>> = vectors
245            .iter()
246            .map(|v| match &v.values {
247                VectorData::F32(vals) => Ok(vals.clone()),
248                VectorData::F64(vals) => Ok(vals.iter().map(|&x| x as f32).collect()),
249                _ => Err(VectorError::UnsupportedOperation(
250                    "PCA only supports float vectors".to_string(),
251                )),
252            })
253            .collect::<Result<Vec<_>, _>>()?;
254
255        let n_samples = data.len();
256        let n_features = data[0].len();
257
258        self.mean = vec![0.0; n_features];
259        for sample in &data {
260            for (i, &val) in sample.iter().enumerate() {
261                self.mean[i] += val;
262            }
263        }
264        for val in &mut self.mean {
265            *val /= n_samples as f32;
266        }
267
268        use nalgebra::DMatrix;
269
270        let training_data: Result<Vec<Vec<f32>>, _> = vectors
271            .iter()
272            .map(|v| match &v.values {
273                VectorData::F32(vals) => Ok(vals.clone()),
274                VectorData::F64(vals) => Ok(vals.iter().map(|&x| x as f32).collect()),
275                _ => Err(VectorError::UnsupportedOperation(
276                    "PCA only supports float vectors".to_string(),
277                )),
278            })
279            .collect();
280
281        let training_data = training_data?;
282        let n_samples = training_data.len();
283        if n_samples == 0 {
284            return Err(VectorError::InvalidDimensions(
285                "No training data provided for PCA".to_string(),
286            ));
287        }
288
289        let mut data_matrix = DMatrix::<f32>::zeros(n_samples, n_features);
290        for (i, sample) in training_data.iter().enumerate() {
291            for (j, &val) in sample.iter().enumerate() {
292                data_matrix[(i, j)] = val - self.mean[j];
293            }
294        }
295
296        let covariance = data_matrix.transpose() * &data_matrix / (n_samples as f32 - 1.0);
297        let svd = covariance.svd(true, true);
298        self.components_matrix = Vec::with_capacity(self.components);
299
300        if let Some(u) = svd.u {
301            let num_components = self.components.min(u.ncols());
302            let singular_values = &svd.singular_values;
303            let total_variance: f32 = singular_values.iter().sum();
304            let mut explained_variance = Vec::with_capacity(num_components);
305
306            for i in 0..num_components {
307                let component: Vec<f32> = u.column(i).iter().cloned().collect();
308                self.components_matrix.push(component);
309
310                let variance_ratio = singular_values[i] / total_variance;
311                explained_variance.push(variance_ratio);
312            }
313
314            self.explained_variance_ratio = explained_variance;
315        } else {
316            return Err(VectorError::CompressionError(
317                "SVD decomposition failed for PCA".to_string(),
318            ));
319        }
320
321        Ok(())
322    }
323
324    fn project(&self, vector: &[f32]) -> Vec<f32> {
325        let mut centered = vector.to_vec();
326        for (i, val) in centered.iter_mut().enumerate() {
327            *val -= self.mean.get(i).unwrap_or(&0.0);
328        }
329
330        let mut projected = vec![0.0; self.components];
331        for (i, component) in self.components_matrix.iter().enumerate() {
332            let mut dot = 0.0;
333            for (j, &val) in centered.iter().enumerate() {
334                dot += val * component.get(j).unwrap_or(&0.0);
335            }
336            projected[i] = dot;
337        }
338
339        projected
340    }
341
342    fn reconstruct(&self, projected: &[f32]) -> Vec<f32> {
343        let n_features = self.mean.len();
344        let mut reconstructed = self.mean.clone();
345
346        for (i, &coeff) in projected.iter().enumerate() {
347            if let Some(component) = self.components_matrix.get(i) {
348                for (j, &comp_val) in component.iter().enumerate() {
349                    if j < n_features {
350                        reconstructed[j] += coeff * comp_val;
351                    }
352                }
353            }
354        }
355
356        reconstructed
357    }
358
359    /// Get the explained variance ratio for each principal component
360    pub fn explained_variance_ratio(&self) -> &[f32] {
361        &self.explained_variance_ratio
362    }
363
364    /// Get the total explained variance for the selected components
365    pub fn total_explained_variance(&self) -> f32 {
366        self.explained_variance_ratio.iter().sum()
367    }
368}
369
370impl VectorCompressor for PcaCompressor {
371    fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
372        let values = match &vector.values {
373            VectorData::F32(v) => v.clone(),
374            VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
375            _ => {
376                return Err(VectorError::UnsupportedOperation(
377                    "PCA only supports float vectors".to_string(),
378                ))
379            }
380        };
381
382        let projected = self.project(&values);
383
384        let mut compressed = Vec::new();
385        compressed.write_all(&(self.components as u32).to_le_bytes())?;
386
387        for val in projected {
388            compressed.write_all(&val.to_le_bytes())?;
389        }
390
391        Ok(compressed)
392    }
393
394    fn decompress(&self, data: &[u8], _dimensions: usize) -> Result<Vector, VectorError> {
395        let mut cursor = std::io::Cursor::new(data);
396
397        let mut components_buf = [0u8; 4];
398        cursor.read_exact(&mut components_buf)?;
399        let components = u32::from_le_bytes(components_buf) as usize;
400
401        let mut projected = Vec::with_capacity(components);
402        let mut val_buf = [0u8; 4];
403
404        for _ in 0..components {
405            cursor.read_exact(&mut val_buf)?;
406            projected.push(f32::from_le_bytes(val_buf));
407        }
408
409        let reconstructed = self.reconstruct(&projected);
410        Ok(Vector::new(reconstructed))
411    }
412
413    fn compression_ratio(&self) -> f32 {
414        if self.mean.is_empty() {
415            1.0
416        } else {
417            self.components as f32 / self.mean.len() as f32
418        }
419    }
420}
421
422// ─────────────────────────────────────────────────────────────────────────────
423// ProductQuantizer
424// ─────────────────────────────────────────────────────────────────────────────
425
426/// Product Quantization compressor for efficient vector compression
427pub struct ProductQuantizer {
428    pub(crate) subvectors: usize,
429    pub(crate) codebook_size: usize,
430    pub(crate) codebooks: Vec<Vec<Vec<f32>>>,
431    pub(crate) subvector_dim: usize,
432}
433
434impl ProductQuantizer {
435    pub fn new(subvectors: usize, codebook_size: usize) -> Self {
436        Self {
437            subvectors,
438            codebook_size,
439            codebooks: Vec::new(),
440            subvector_dim: 0,
441        }
442    }
443
444    pub fn train(&mut self, vectors: &[Vector]) -> Result<(), VectorError> {
445        if vectors.is_empty() {
446            return Err(VectorError::InvalidDimensions(
447                "No training data provided for Product Quantization".to_string(),
448            ));
449        }
450
451        let vector_dim = vectors[0].dimensions;
452        if vector_dim % self.subvectors != 0 {
453            return Err(VectorError::InvalidDimensions(format!(
454                "Vector dimension {} is not divisible by number of subvectors {}",
455                vector_dim, self.subvectors
456            )));
457        }
458
459        self.subvector_dim = vector_dim / self.subvectors;
460        self.codebooks = Vec::with_capacity(self.subvectors);
461
462        let training_data: Result<Vec<Vec<f32>>, _> = vectors
463            .iter()
464            .map(|v| match &v.values {
465                VectorData::F32(vals) => Ok(vals.clone()),
466                VectorData::F64(vals) => Ok(vals.iter().map(|&x| x as f32).collect()),
467                _ => Err(VectorError::UnsupportedOperation(
468                    "Product quantization only supports float vectors".to_string(),
469                )),
470            })
471            .collect();
472
473        let training_data = training_data?;
474
475        for subvec_idx in 0..self.subvectors {
476            let start_dim = subvec_idx * self.subvector_dim;
477            let end_dim = start_dim + self.subvector_dim;
478
479            let subvectors: Vec<Vec<f32>> = training_data
480                .iter()
481                .map(|v| v[start_dim..end_dim].to_vec())
482                .collect();
483
484            let codebook = self.train_codebook(&subvectors)?;
485            self.codebooks.push(codebook);
486        }
487
488        Ok(())
489    }
490
491    fn train_codebook(&self, subvectors: &[Vec<f32>]) -> Result<Vec<Vec<f32>>, VectorError> {
492        use scirs2_core::random::Random;
493        let mut rng = Random::seed(42);
494
495        if subvectors.is_empty() {
496            return Err(VectorError::InvalidDimensions(
497                "No subvectors to train codebook".to_string(),
498            ));
499        }
500
501        let dim = subvectors[0].len();
502        let mut centroids = Vec::with_capacity(self.codebook_size);
503
504        for _ in 0..self.codebook_size {
505            let mut centroid = vec![0.0; dim];
506            for val in &mut centroid {
507                *val = rng.gen_range(-1.0..1.0);
508            }
509            centroids.push(centroid);
510        }
511
512        for _ in 0..10 {
513            let mut assignments = vec![0; subvectors.len()];
514
515            for (i, subvec) in subvectors.iter().enumerate() {
516                let mut best_dist = f32::INFINITY;
517                let mut best_centroid = 0;
518
519                for (j, centroid) in centroids.iter().enumerate() {
520                    let dist = euclidean_distance(subvec, centroid);
521                    if dist < best_dist {
522                        best_dist = dist;
523                        best_centroid = j;
524                    }
525                }
526                assignments[i] = best_centroid;
527            }
528
529            for (j, centroid) in centroids.iter_mut().enumerate() {
530                let assigned_points: Vec<&Vec<f32>> = subvectors
531                    .iter()
532                    .enumerate()
533                    .filter(|(i, _)| assignments[*i] == j)
534                    .map(|(_, v)| v)
535                    .collect();
536
537                if !assigned_points.is_empty() {
538                    for (d, centroid_val) in centroid.iter_mut().enumerate() {
539                        *centroid_val = assigned_points.iter().map(|p| p[d]).sum::<f32>()
540                            / assigned_points.len() as f32;
541                    }
542                }
543            }
544        }
545
546        Ok(centroids)
547    }
548
549    pub(crate) fn quantize_vector(&self, vector: &[f32]) -> Result<Vec<u8>, VectorError> {
550        if vector.len() != self.subvectors * self.subvector_dim {
551            return Err(VectorError::InvalidDimensions(format!(
552                "Vector dimension {} doesn't match expected {}",
553                vector.len(),
554                self.subvectors * self.subvector_dim
555            )));
556        }
557
558        let mut codes = Vec::with_capacity(self.subvectors);
559
560        for subvec_idx in 0..self.subvectors {
561            let start_dim = subvec_idx * self.subvector_dim;
562            let end_dim = start_dim + self.subvector_dim;
563            let subvector = &vector[start_dim..end_dim];
564
565            let codebook = &self.codebooks[subvec_idx];
566            let mut best_dist = f32::INFINITY;
567            let mut best_code = 0u8;
568
569            for (code, centroid) in codebook.iter().enumerate() {
570                let dist = euclidean_distance(subvector, centroid);
571                if dist < best_dist {
572                    best_dist = dist;
573                    best_code = code as u8;
574                }
575            }
576
577            codes.push(best_code);
578        }
579
580        Ok(codes)
581    }
582
583    pub(crate) fn dequantize_codes(&self, codes: &[u8]) -> Result<Vec<f32>, VectorError> {
584        if codes.len() != self.subvectors {
585            return Err(VectorError::InvalidDimensions(format!(
586                "Code length {} doesn't match expected {}",
587                codes.len(),
588                self.subvectors
589            )));
590        }
591
592        let mut reconstructed = Vec::with_capacity(self.subvectors * self.subvector_dim);
593
594        for (subvec_idx, &code) in codes.iter().enumerate() {
595            let codebook = &self.codebooks[subvec_idx];
596            if (code as usize) < codebook.len() {
597                reconstructed.extend_from_slice(&codebook[code as usize]);
598            } else {
599                return Err(VectorError::InvalidDimensions(format!(
600                    "Invalid code {} for codebook of size {}",
601                    code,
602                    codebook.len()
603                )));
604            }
605        }
606
607        Ok(reconstructed)
608    }
609}
610
611impl VectorCompressor for ProductQuantizer {
612    fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
613        let values = match &vector.values {
614            VectorData::F32(v) => v.clone(),
615            VectorData::F64(v) => v.iter().map(|&x| x as f32).collect(),
616            _ => {
617                return Err(VectorError::UnsupportedOperation(
618                    "Product quantization only supports float vectors".to_string(),
619                ))
620            }
621        };
622
623        let codes = self.quantize_vector(&values)?;
624
625        let mut compressed = Vec::new();
626        compressed.write_all(&(self.subvectors as u32).to_le_bytes())?;
627        compressed.write_all(&(self.codebook_size as u32).to_le_bytes())?;
628        compressed.write_all(&(self.subvector_dim as u32).to_le_bytes())?;
629        compressed.extend_from_slice(&codes);
630
631        Ok(compressed)
632    }
633
634    fn decompress(&self, data: &[u8], _dimensions: usize) -> Result<Vector, VectorError> {
635        if data.len() < 12 {
636            return Err(VectorError::InvalidData(
637                "Invalid compressed data format".to_string(),
638            ));
639        }
640
641        let subvectors = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
642        let codebook_size = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
643        let subvector_dim = u32::from_le_bytes([data[8], data[9], data[10], data[11]]) as usize;
644
645        if subvectors != self.subvectors
646            || codebook_size != self.codebook_size
647            || subvector_dim != self.subvector_dim
648        {
649            return Err(VectorError::InvalidData(
650                "Metadata mismatch in compressed data".to_string(),
651            ));
652        }
653
654        let codes = &data[12..];
655        if codes.len() != subvectors {
656            return Err(VectorError::InvalidData("Invalid code length".to_string()));
657        }
658
659        let values = self.dequantize_codes(codes)?;
660        Ok(Vector::new(values))
661    }
662
663    fn compression_ratio(&self) -> f32 {
664        (8.0 * self.subvectors as f32) / (32.0 * self.subvectors as f32 * self.subvector_dim as f32)
665    }
666}
667
668// ─────────────────────────────────────────────────────────────────────────────
669// NoOpCompressor (identity — used as fallback)
670// ─────────────────────────────────────────────────────────────────────────────
671
672pub(crate) struct NoOpCompressor;
673
674impl VectorCompressor for NoOpCompressor {
675    fn compress(&self, vector: &Vector) -> Result<Vec<u8>, VectorError> {
676        vector_to_bytes(vector)
677    }
678
679    fn decompress(&self, data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
680        bytes_to_vector(data, dimensions)
681    }
682
683    fn compression_ratio(&self) -> f32 {
684        1.0
685    }
686}
687
688// ─────────────────────────────────────────────────────────────────────────────
689// Byte-level helpers shared by several codecs
690// ─────────────────────────────────────────────────────────────────────────────
691
692pub fn vector_to_bytes(vector: &Vector) -> Result<Vec<u8>, VectorError> {
693    let mut bytes = Vec::new();
694
695    let type_byte = match &vector.values {
696        VectorData::F32(_) => 0u8,
697        VectorData::F64(_) => 1u8,
698        VectorData::F16(_) => 2u8,
699        VectorData::I8(_) => 3u8,
700        VectorData::Binary(_) => 4u8,
701    };
702    bytes.push(type_byte);
703
704    match &vector.values {
705        VectorData::F32(v) => {
706            for val in v {
707                bytes.write_all(&val.to_le_bytes())?;
708            }
709        }
710        VectorData::F64(v) => {
711            for val in v {
712                bytes.write_all(&val.to_le_bytes())?;
713            }
714        }
715        VectorData::F16(v) => {
716            for val in v {
717                bytes.write_all(&val.to_le_bytes())?;
718            }
719        }
720        VectorData::I8(v) => {
721            for &val in v {
722                bytes.push(val as u8);
723            }
724        }
725        VectorData::Binary(v) => {
726            bytes.extend_from_slice(v);
727        }
728    }
729
730    Ok(bytes)
731}
732
733pub fn bytes_to_vector(data: &[u8], dimensions: usize) -> Result<Vector, VectorError> {
734    if data.is_empty() {
735        return Err(VectorError::InvalidDimensions("Empty data".to_string()));
736    }
737
738    let type_byte = data[0];
739    let data = &data[1..];
740
741    match type_byte {
742        0 => {
743            let mut values = Vec::with_capacity(dimensions);
744            let mut cursor = std::io::Cursor::new(data);
745            let mut buf = [0u8; 4];
746
747            for _ in 0..dimensions {
748                cursor.read_exact(&mut buf)?;
749                values.push(f32::from_le_bytes(buf));
750            }
751            Ok(Vector::new(values))
752        }
753        1 => {
754            let mut values = Vec::with_capacity(dimensions);
755            let mut cursor = std::io::Cursor::new(data);
756            let mut buf = [0u8; 8];
757
758            for _ in 0..dimensions {
759                cursor.read_exact(&mut buf)?;
760                values.push(f64::from_le_bytes(buf));
761            }
762            Ok(Vector::f64(values))
763        }
764        2 => {
765            let mut values = Vec::with_capacity(dimensions);
766            let mut cursor = std::io::Cursor::new(data);
767            let mut buf = [0u8; 2];
768
769            for _ in 0..dimensions {
770                cursor.read_exact(&mut buf)?;
771                values.push(u16::from_le_bytes(buf));
772            }
773            Ok(Vector::f16(values))
774        }
775        3 => Ok(Vector::i8(
776            data[..dimensions].iter().map(|&b| b as i8).collect(),
777        )),
778        4 => Ok(Vector::binary(data[..dimensions].to_vec())),
779        _ => Err(VectorError::InvalidData(format!(
780            "Unknown vector type: {type_byte}"
781        ))),
782    }
783}