Skip to main content

clark_hash/
quantized.rs

1use crate::bitpack::PackedCodes;
2
3/// A quantized database-side sketch.
4///
5/// The codes are bit-packed and can be scored asymmetrically against a
6/// [`QuerySketch`].
7#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
8#[derive(Debug, Clone, PartialEq)]
9pub struct QuantizedVector {
10    pub(crate) sketch_dim: usize,
11    pub(crate) bits: u8,
12    pub(crate) codes: PackedCodes,
13    pub(crate) encoded_norm: Option<u16>,
14}
15
16impl QuantizedVector {
17    /// Returns the number of sketch coordinates.
18    pub fn sketch_dim(&self) -> usize {
19        self.sketch_dim
20    }
21
22    /// Returns the number of bits used per coordinate.
23    pub fn bits(&self) -> u8 {
24        self.bits
25    }
26
27    /// Returns the packed bytes that store the quantized sketch.
28    pub fn packed_bytes(&self) -> &[u8] {
29        self.codes.bytes()
30    }
31
32    /// Returns the optional encoded norm channel.
33    pub fn encoded_norm(&self) -> Option<u16> {
34        self.encoded_norm
35    }
36
37    /// Returns the total number of bytes used by this code.
38    pub fn storage_bytes(&self) -> usize {
39        self.codes.bytes().len() + usize::from(self.encoded_norm.is_some()) * 2
40    }
41}
42
43/// A floating-point query-side sketch used for asymmetric scoring.
44#[derive(Debug, Clone, PartialEq)]
45pub struct QuerySketch {
46    pub(crate) values: Vec<f32>,
47    pub(crate) input_norm: f32,
48}
49
50impl QuerySketch {
51    /// Returns the floating-point sketch coordinates.
52    pub fn values(&self) -> &[f32] {
53        &self.values
54    }
55
56    /// Returns the L2 norm of the original unnormalized query embedding.
57    pub fn input_norm(&self) -> f32 {
58        self.input_norm
59    }
60
61    /// Returns the sketch dimension.
62    pub fn sketch_dim(&self) -> usize {
63        self.values.len()
64    }
65}