manifoldb_vector/encoding/
inverted_keys.rs1use manifoldb_core::PointId;
20
21use super::point_keys::hash_name;
22
23pub const PREFIX_POSTING: u8 = 0x30;
25
26pub const PREFIX_INVERTED_META: u8 = 0x31;
28
29pub const PREFIX_POINT_TOKENS: u8 = 0x32;
31
32#[must_use]
40pub fn encode_posting_key(collection_name: &str, vector_name: &str, token_id: u32) -> Vec<u8> {
41 let mut key = Vec::with_capacity(21);
42 key.push(PREFIX_POSTING);
43 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
44 key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
45 key.extend_from_slice(&token_id.to_be_bytes());
46 key
47}
48
49#[must_use]
51pub fn encode_posting_prefix(collection_name: &str, vector_name: &str) -> Vec<u8> {
52 let mut key = Vec::with_capacity(17);
53 key.push(PREFIX_POSTING);
54 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
55 key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
56 key
57}
58
59#[must_use]
61pub fn encode_posting_collection_prefix(collection_name: &str) -> Vec<u8> {
62 let mut key = Vec::with_capacity(9);
63 key.push(PREFIX_POSTING);
64 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
65 key
66}
67
68#[derive(Debug, Clone, Copy, PartialEq, Eq)]
70pub struct PostingKey {
71 pub collection_name_hash: u64,
73 pub vector_name_hash: u64,
75 pub token_id: u32,
77}
78
79#[must_use]
83pub fn decode_posting_key(key: &[u8]) -> Option<PostingKey> {
84 if key.len() != 21 || key[0] != PREFIX_POSTING {
85 return None;
86 }
87 let collection_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
88 let vector_hash_bytes: [u8; 8] = key[9..17].try_into().ok()?;
89 let token_id_bytes: [u8; 4] = key[17..21].try_into().ok()?;
90 Some(PostingKey {
91 collection_name_hash: u64::from_be_bytes(collection_hash_bytes),
92 vector_name_hash: u64::from_be_bytes(vector_hash_bytes),
93 token_id: u32::from_be_bytes(token_id_bytes),
94 })
95}
96
97#[must_use]
105pub fn encode_inverted_meta_key(collection_name: &str, vector_name: &str) -> Vec<u8> {
106 let mut key = Vec::with_capacity(17);
107 key.push(PREFIX_INVERTED_META);
108 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
109 key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
110 key
111}
112
113#[must_use]
115pub fn encode_inverted_meta_collection_prefix(collection_name: &str) -> Vec<u8> {
116 let mut key = Vec::with_capacity(9);
117 key.push(PREFIX_INVERTED_META);
118 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
119 key
120}
121
122#[derive(Debug, Clone, Copy, PartialEq, Eq)]
124pub struct InvertedMetaKey {
125 pub collection_name_hash: u64,
127 pub vector_name_hash: u64,
129}
130
131#[must_use]
135pub fn decode_inverted_meta_key(key: &[u8]) -> Option<InvertedMetaKey> {
136 if key.len() != 17 || key[0] != PREFIX_INVERTED_META {
137 return None;
138 }
139 let collection_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
140 let vector_hash_bytes: [u8; 8] = key[9..17].try_into().ok()?;
141 Some(InvertedMetaKey {
142 collection_name_hash: u64::from_be_bytes(collection_hash_bytes),
143 vector_name_hash: u64::from_be_bytes(vector_hash_bytes),
144 })
145}
146
147#[must_use]
155pub fn encode_point_tokens_key(
156 collection_name: &str,
157 vector_name: &str,
158 point_id: PointId,
159) -> Vec<u8> {
160 let mut key = Vec::with_capacity(25);
161 key.push(PREFIX_POINT_TOKENS);
162 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
163 key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
164 key.extend_from_slice(&point_id.as_u64().to_be_bytes());
165 key
166}
167
168#[must_use]
170pub fn encode_point_tokens_prefix(collection_name: &str, vector_name: &str) -> Vec<u8> {
171 let mut key = Vec::with_capacity(17);
172 key.push(PREFIX_POINT_TOKENS);
173 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
174 key.extend_from_slice(&hash_name(vector_name).to_be_bytes());
175 key
176}
177
178#[must_use]
180pub fn encode_point_tokens_collection_prefix(collection_name: &str) -> Vec<u8> {
181 let mut key = Vec::with_capacity(9);
182 key.push(PREFIX_POINT_TOKENS);
183 key.extend_from_slice(&hash_name(collection_name).to_be_bytes());
184 key
185}
186
187#[derive(Debug, Clone, Copy, PartialEq, Eq)]
189pub struct PointTokensKey {
190 pub collection_name_hash: u64,
192 pub vector_name_hash: u64,
194 pub point_id: PointId,
196}
197
198#[must_use]
202pub fn decode_point_tokens_key(key: &[u8]) -> Option<PointTokensKey> {
203 if key.len() != 25 || key[0] != PREFIX_POINT_TOKENS {
204 return None;
205 }
206 let collection_hash_bytes: [u8; 8] = key[1..9].try_into().ok()?;
207 let vector_hash_bytes: [u8; 8] = key[9..17].try_into().ok()?;
208 let point_id_bytes: [u8; 8] = key[17..25].try_into().ok()?;
209 Some(PointTokensKey {
210 collection_name_hash: u64::from_be_bytes(collection_hash_bytes),
211 vector_name_hash: u64::from_be_bytes(vector_hash_bytes),
212 point_id: PointId::new(u64::from_be_bytes(point_id_bytes)),
213 })
214}
215
216#[cfg(test)]
217mod tests {
218 use super::*;
219
220 #[test]
221 fn posting_key_roundtrip() {
222 let collection = "documents";
223 let vector = "keywords";
224 let token_id = 12345u32;
225
226 let key = encode_posting_key(collection, vector, token_id);
227 assert_eq!(key.len(), 21);
228 assert_eq!(key[0], PREFIX_POSTING);
229
230 let decoded = decode_posting_key(&key).unwrap();
231 assert_eq!(decoded.collection_name_hash, hash_name(collection));
232 assert_eq!(decoded.vector_name_hash, hash_name(vector));
233 assert_eq!(decoded.token_id, token_id);
234 }
235
236 #[test]
237 fn posting_keys_sorted_by_token_id() {
238 let key1 = encode_posting_key("col", "vec", 1);
239 let key2 = encode_posting_key("col", "vec", 2);
240 let key3 = encode_posting_key("col", "vec", 100);
241
242 assert!(key1 < key2);
243 assert!(key2 < key3);
244 }
245
246 #[test]
247 fn posting_prefix_matches() {
248 let collection = "documents";
249 let vector = "keywords";
250 let prefix = encode_posting_prefix(collection, vector);
251
252 let key1 = encode_posting_key(collection, vector, 1);
253 let key2 = encode_posting_key(collection, vector, 1000);
254
255 assert!(key1.starts_with(&prefix));
256 assert!(key2.starts_with(&prefix));
257
258 let key_other = encode_posting_key(collection, "other_vector", 1);
260 assert!(!key_other.starts_with(&prefix));
261 }
262
263 #[test]
264 fn inverted_meta_key_roundtrip() {
265 let collection = "documents";
266 let vector = "keywords";
267
268 let key = encode_inverted_meta_key(collection, vector);
269 assert_eq!(key.len(), 17);
270 assert_eq!(key[0], PREFIX_INVERTED_META);
271
272 let decoded = decode_inverted_meta_key(&key).unwrap();
273 assert_eq!(decoded.collection_name_hash, hash_name(collection));
274 assert_eq!(decoded.vector_name_hash, hash_name(vector));
275 }
276
277 #[test]
278 fn point_tokens_key_roundtrip() {
279 let collection = "documents";
280 let vector = "keywords";
281 let point_id = PointId::new(42);
282
283 let key = encode_point_tokens_key(collection, vector, point_id);
284 assert_eq!(key.len(), 25);
285 assert_eq!(key[0], PREFIX_POINT_TOKENS);
286
287 let decoded = decode_point_tokens_key(&key).unwrap();
288 assert_eq!(decoded.collection_name_hash, hash_name(collection));
289 assert_eq!(decoded.vector_name_hash, hash_name(vector));
290 assert_eq!(decoded.point_id, point_id);
291 }
292
293 #[test]
294 fn decode_invalid_keys() {
295 assert!(decode_posting_key(&[PREFIX_INVERTED_META; 21]).is_none());
297 assert!(decode_inverted_meta_key(&[PREFIX_POSTING; 17]).is_none());
298 assert!(decode_point_tokens_key(&[PREFIX_POSTING; 25]).is_none());
299
300 assert!(decode_posting_key(&[PREFIX_POSTING; 10]).is_none());
302 assert!(decode_inverted_meta_key(&[PREFIX_INVERTED_META; 10]).is_none());
303 assert!(decode_point_tokens_key(&[PREFIX_POINT_TOKENS; 10]).is_none());
304
305 assert!(decode_posting_key(&[]).is_none());
307 assert!(decode_inverted_meta_key(&[]).is_none());
308 assert!(decode_point_tokens_key(&[]).is_none());
309 }
310}