vectx_core/
point.rs

1use serde::{Deserialize, Serialize};
2use uuid::Uuid;
3use std::collections::HashMap;
4use crate::vector::Vector;
5use crate::multivector::MultiVector;
6
7/// Sparse vector with indices and values
8#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
9pub struct SparseVector {
10    /// Indices of non-zero elements
11    pub indices: Vec<u32>,
12    /// Values at those indices
13    pub values: Vec<f32>,
14}
15
16impl SparseVector {
17    /// Create a new sparse vector
18    pub fn new(indices: Vec<u32>, values: Vec<f32>) -> Self {
19        Self { indices, values }
20    }
21    
22    /// Compute dot product with another sparse vector
23    pub fn dot(&self, other: &SparseVector) -> f32 {
24        let mut result = 0.0f32;
25        
26        // Create a map of indices to values for efficient lookup
27        let other_map: HashMap<u32, f32> = other.indices.iter()
28            .zip(other.values.iter())
29            .map(|(&i, &v)| (i, v))
30            .collect();
31        
32        // Sum products for matching indices
33        for (&idx, &val) in self.indices.iter().zip(self.values.iter()) {
34            if let Some(&other_val) = other_map.get(&idx) {
35                result += val * other_val;
36            }
37        }
38        
39        result
40    }
41    
42    /// Check if empty
43    pub fn is_empty(&self) -> bool {
44        self.indices.is_empty()
45    }
46}
47
48/// Vector data - can be a single dense vector or a multivector (ColBERT-style)
49#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
50#[serde(untagged)]
51pub enum VectorData {
52    /// Single dense vector
53    Single(Vector),
54    /// Multiple vectors per point (ColBERT-style late interaction)
55    Multi(MultiVector),
56}
57
58impl VectorData {
59    /// Get dimension of the vector(s)
60    pub fn dim(&self) -> usize {
61        match self {
62            VectorData::Single(v) => v.dim(),
63            VectorData::Multi(mv) => mv.dim(),
64        }
65    }
66    
67    /// Check if this is a multivector
68    pub fn is_multi(&self) -> bool {
69        matches!(self, VectorData::Multi(_))
70    }
71    
72    /// Get as single vector (for backwards compatibility)
73    /// For multivector, returns the first sub-vector
74    pub fn as_single(&self) -> Vector {
75        match self {
76            VectorData::Single(v) => v.clone(),
77            VectorData::Multi(mv) => mv.to_single_vector(),
78        }
79    }
80    
81    /// Get as slice (for single vectors only)
82    pub fn as_slice(&self) -> &[f32] {
83        match self {
84            VectorData::Single(v) => v.as_slice(),
85            VectorData::Multi(mv) => mv.vectors().first().map(|v| v.as_slice()).unwrap_or(&[]),
86        }
87    }
88}
89
90impl From<Vector> for VectorData {
91    fn from(v: Vector) -> Self {
92        VectorData::Single(v)
93    }
94}
95
96impl From<MultiVector> for VectorData {
97    fn from(mv: MultiVector) -> Self {
98        VectorData::Multi(mv)
99    }
100}
101
102/// A point in the vector space with optional payload
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct Point {
105    pub id: PointId,
106    /// Version number - incremented on each update
107    #[serde(default)]
108    pub version: u64,
109    /// Vector data - backwards compatible field name
110    #[serde(alias = "vectors")]
111    pub vector: Vector,
112    /// Optional multivector data for ColBERT-style search
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub multivector: Option<MultiVector>,
115    /// Named sparse vectors (e.g., {"keywords": SparseVector})
116    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
117    pub sparse_vectors: HashMap<String, SparseVector>,
118    pub payload: Option<serde_json::Value>,
119}
120
121#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
122#[serde(untagged)]
123pub enum PointId {
124    String(String),
125    Uuid(Uuid),
126    Integer(u64),
127}
128
129impl std::fmt::Display for PointId {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        match self {
132            PointId::String(s) => write!(f, "{}", s),
133            PointId::Uuid(u) => write!(f, "{}", u),
134            PointId::Integer(i) => write!(f, "{}", i),
135        }
136    }
137}
138
139impl From<String> for PointId {
140    fn from(s: String) -> Self {
141        PointId::String(s)
142    }
143}
144
145impl From<u64> for PointId {
146    fn from(i: u64) -> Self {
147        PointId::Integer(i)
148    }
149}
150
151impl From<Uuid> for PointId {
152    fn from(u: Uuid) -> Self {
153        PointId::Uuid(u)
154    }
155}
156
157impl Point {
158    /// Create a new point with a single dense vector
159    #[inline]
160    #[must_use]
161    pub fn new(id: PointId, vector: Vector, payload: Option<serde_json::Value>) -> Self {
162        Self {
163            id,
164            version: 0,
165            vector,
166            multivector: None,
167            sparse_vectors: HashMap::new(),
168            payload,
169        }
170    }
171    
172    /// Create a new point with a multivector (ColBERT-style)
173    #[inline]
174    #[must_use]
175    pub fn new_multi(id: PointId, multivector: MultiVector, payload: Option<serde_json::Value>) -> Self {
176        // Store first sub-vector as the primary vector for backwards compatibility
177        let vector = multivector.to_single_vector();
178        Self {
179            id,
180            version: 0,
181            vector,
182            multivector: Some(multivector),
183            sparse_vectors: HashMap::new(),
184            payload,
185        }
186    }
187    
188    /// Create a new point with sparse vectors
189    #[inline]
190    #[must_use]
191    pub fn new_sparse(id: PointId, sparse_vectors: HashMap<String, SparseVector>, payload: Option<serde_json::Value>) -> Self {
192        Self {
193            id,
194            version: 0,
195            vector: Vector::new(vec![0.0]), // Placeholder for sparse-only points
196            multivector: None,
197            sparse_vectors,
198            payload,
199        }
200    }
201    
202    /// Add a sparse vector to this point
203    pub fn add_sparse_vector(&mut self, name: String, sparse: SparseVector) {
204        self.sparse_vectors.insert(name, sparse);
205    }
206    
207    /// Get a sparse vector by name
208    pub fn get_sparse_vector(&self, name: &str) -> Option<&SparseVector> {
209        self.sparse_vectors.get(name)
210    }
211    
212    /// Check if this point has multivector data
213    #[inline]
214    pub fn has_multivector(&self) -> bool {
215        self.multivector.is_some()
216    }
217    
218    /// Get the multivector if present
219    #[inline]
220    pub fn get_multivector(&self) -> Option<&MultiVector> {
221        self.multivector.as_ref()
222    }
223
224    #[inline]
225    #[must_use]
226    pub fn with_payload(mut self, payload: serde_json::Value) -> Self {
227        self.payload = Some(payload);
228        self
229    }
230    
231    #[inline]
232    #[must_use]
233    pub fn with_multivector(mut self, multivector: MultiVector) -> Self {
234        self.multivector = Some(multivector);
235        self
236    }
237}
238