1pub mod centroid_chunk;
6pub mod centroid_stats;
7pub mod collection_meta;
8pub mod deletions;
9pub mod id_dictionary;
10pub mod key;
11pub mod metadata_index;
12pub mod posting_list;
13pub mod vector_bitmap;
14pub mod vector_data;
15
16use bytes::BytesMut;
17
18pub use common::serde::encoding::{
20 EncodingError, decode_optional_utf8, decode_utf8, encode_optional_utf8, encode_utf8,
21};
22use common::serde::key_prefix::{KeyPrefix, RecordTag};
23
24pub const KEY_VERSION: u8 = 0x01;
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum RecordType {
30 CollectionMeta = 0x01,
31 Deletions = 0x02,
32 CentroidChunk = 0x03,
33 PostingList = 0x04,
34 IdDictionary = 0x05,
35 VectorData = 0x06,
36 MetadataIndex = 0x07,
37 SeqBlock = 0x08,
38 CentroidStats = 0x09,
39}
40
41impl RecordType {
42 pub fn id(&self) -> u8 {
44 *self as u8
45 }
46
47 pub fn from_id(id: u8) -> Result<Self, EncodingError> {
49 match id {
50 0x01 => Ok(RecordType::CollectionMeta),
51 0x02 => Ok(RecordType::Deletions),
52 0x03 => Ok(RecordType::CentroidChunk),
53 0x04 => Ok(RecordType::PostingList),
54 0x05 => Ok(RecordType::IdDictionary),
55 0x06 => Ok(RecordType::VectorData),
56 0x07 => Ok(RecordType::MetadataIndex),
57 0x08 => Ok(RecordType::SeqBlock),
58 0x09 => Ok(RecordType::CentroidStats),
59 _ => Err(EncodingError {
60 message: format!("Invalid record type: 0x{:02x}", id),
61 }),
62 }
63 }
64
65 pub fn tag(&self) -> RecordTag {
67 RecordTag::new(self.id(), 0)
68 }
69
70 pub fn prefix(&self) -> KeyPrefix {
72 KeyPrefix::new(KEY_VERSION, self.tag())
73 }
74}
75
76pub fn record_type_from_tag(tag: RecordTag) -> Result<RecordType, EncodingError> {
78 RecordType::from_id(tag.record_type())
79}
80
81pub trait RecordKey {
83 const RECORD_TYPE: RecordType;
84}
85
86#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
93#[repr(u8)]
94pub enum FieldType {
95 String = 0,
96 Int64 = 1,
97 Float64 = 2,
98 Bool = 3,
99 Vector = 255,
102}
103
104impl FieldType {
105 pub fn from_byte(byte: u8) -> Result<Self, EncodingError> {
106 match byte {
107 0 => Ok(FieldType::String),
108 1 => Ok(FieldType::Int64),
109 2 => Ok(FieldType::Float64),
110 3 => Ok(FieldType::Bool),
111 255 => Ok(FieldType::Vector),
112 _ => Err(EncodingError {
113 message: format!("Invalid field type: {}", byte),
114 }),
115 }
116 }
117}
118
119#[derive(Debug, Clone, PartialEq)]
132pub enum FieldValue {
133 String(String),
134 Int64(i64),
135 Float64(f64),
136 Bool(bool),
137 Vector(Vec<f32>),
139}
140
141impl FieldValue {
142 pub fn field_type(&self) -> FieldType {
144 match self {
145 FieldValue::String(_) => FieldType::String,
146 FieldValue::Int64(_) => FieldType::Int64,
147 FieldValue::Float64(_) => FieldType::Float64,
148 FieldValue::Bool(_) => FieldType::Bool,
149 FieldValue::Vector(_) => FieldType::Vector,
150 }
151 }
152
153 pub fn encode_sortable(&self, buf: &mut BytesMut) {
168 use bytes::BufMut;
169 use common::serde::sortable::{encode_f64_sortable, encode_i64_sortable};
170 use common::serde::terminated_bytes;
171
172 match self {
173 FieldValue::String(s) => {
174 buf.put_u8(FieldType::String as u8);
175 terminated_bytes::serialize(s.as_bytes(), buf);
176 }
177 FieldValue::Int64(v) => {
178 buf.put_u8(FieldType::Int64 as u8);
179 buf.put_u64(encode_i64_sortable(*v));
180 }
181 FieldValue::Float64(v) => {
182 buf.put_u8(FieldType::Float64 as u8);
183 buf.put_u64(encode_f64_sortable(*v));
184 }
185 FieldValue::Bool(v) => {
186 buf.put_u8(FieldType::Bool as u8);
187 buf.put_u8(if *v { 1 } else { 0 });
188 }
189 FieldValue::Vector(_) => {
190 panic!("Vector values cannot be used in sortable key encoding")
191 }
192 }
193 }
194
195 pub fn decode_sortable(buf: &mut &[u8]) -> Result<Self, EncodingError> {
201 use common::serde::sortable::{decode_f64_sortable, decode_i64_sortable};
202 use common::serde::terminated_bytes;
203
204 if buf.is_empty() {
205 return Err(EncodingError {
206 message: "Buffer too short for FieldValue type".to_string(),
207 });
208 }
209
210 let field_type = FieldType::from_byte(buf[0])?;
211 *buf = &buf[1..];
212
213 match field_type {
214 FieldType::String => {
215 let bytes = terminated_bytes::deserialize(buf).map_err(|e| EncodingError {
216 message: format!("Failed to decode string value: {}", e),
217 })?;
218 let s = String::from_utf8(bytes.to_vec()).map_err(|e| EncodingError {
219 message: format!("Invalid UTF-8 in string value: {}", e),
220 })?;
221 Ok(FieldValue::String(s))
222 }
223 FieldType::Int64 => {
224 if buf.len() < 8 {
225 return Err(EncodingError {
226 message: "Buffer too short for Int64 value".to_string(),
227 });
228 }
229 let sortable = u64::from_be_bytes([
230 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
231 ]);
232 *buf = &buf[8..];
233 Ok(FieldValue::Int64(decode_i64_sortable(sortable)))
234 }
235 FieldType::Float64 => {
236 if buf.len() < 8 {
237 return Err(EncodingError {
238 message: "Buffer too short for Float64 value".to_string(),
239 });
240 }
241 let sortable = u64::from_be_bytes([
242 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
243 ]);
244 *buf = &buf[8..];
245 Ok(FieldValue::Float64(decode_f64_sortable(sortable)))
246 }
247 FieldType::Bool => {
248 if buf.is_empty() {
249 return Err(EncodingError {
250 message: "Buffer too short for Bool value".to_string(),
251 });
252 }
253 let value = buf[0] != 0;
254 *buf = &buf[1..];
255 Ok(FieldValue::Bool(value))
256 }
257 FieldType::Vector => Err(EncodingError {
258 message: "Vector values cannot be used in sortable key encoding".to_string(),
259 }),
260 }
261 }
262}
263
264pub trait Encode {
266 fn encode(&self, buf: &mut BytesMut);
267}
268
269pub trait Decode: Sized {
271 fn decode(buf: &mut &[u8]) -> Result<Self, EncodingError>;
272}
273
274pub fn encode_array<T: Encode>(items: &[T], buf: &mut BytesMut) {
278 let count = items.len();
279 if count > u16::MAX as usize {
280 panic!("Array too long: {} items", count);
281 }
282 buf.extend_from_slice(&(count as u16).to_le_bytes());
283 for item in items {
284 item.encode(buf);
285 }
286}
287
288pub fn decode_array<T: Decode>(buf: &mut &[u8]) -> Result<Vec<T>, EncodingError> {
292 if buf.len() < 2 {
293 return Err(EncodingError {
294 message: "Buffer too short for array count".to_string(),
295 });
296 }
297 let count = u16::from_le_bytes([buf[0], buf[1]]) as usize;
298 *buf = &buf[2..];
299
300 let mut items = Vec::with_capacity(count);
301 for _ in 0..count {
302 items.push(T::decode(buf)?);
303 }
304 Ok(items)
305}
306
307pub fn encode_fixed_element_array<T: Encode>(items: &[T], buf: &mut BytesMut) {
311 for item in items {
312 item.encode(buf);
313 }
314}
315
316pub fn decode_fixed_element_array<T: Decode>(
321 buf: &mut &[u8],
322 element_size: usize,
323) -> Result<Vec<T>, EncodingError> {
324 if !buf.len().is_multiple_of(element_size) {
325 return Err(EncodingError {
326 message: format!(
327 "Buffer length {} is not divisible by element size {}",
328 buf.len(),
329 element_size
330 ),
331 });
332 }
333
334 let count = buf.len() / element_size;
335 let mut items = Vec::with_capacity(count);
336 for _ in 0..count {
337 items.push(T::decode(buf)?);
338 }
339 Ok(items)
340}
341
342impl Encode for f32 {
345 fn encode(&self, buf: &mut BytesMut) {
346 buf.extend_from_slice(&self.to_le_bytes());
347 }
348}
349
350impl Decode for f32 {
351 fn decode(buf: &mut &[u8]) -> Result<Self, EncodingError> {
352 if buf.len() < 4 {
353 return Err(EncodingError {
354 message: "Buffer too short for f32".to_string(),
355 });
356 }
357 let value = f32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]);
358 *buf = &buf[4..];
359 Ok(value)
360 }
361}
362
363impl Encode for u8 {
364 fn encode(&self, buf: &mut BytesMut) {
365 buf.extend_from_slice(&[*self]);
366 }
367}
368
369impl Decode for u8 {
370 fn decode(buf: &mut &[u8]) -> Result<Self, EncodingError> {
371 if buf.is_empty() {
372 return Err(EncodingError {
373 message: "Buffer too short for u8".to_string(),
374 });
375 }
376 let value = buf[0];
377 *buf = &buf[1..];
378 Ok(value)
379 }
380}
381
382impl Encode for u16 {
383 fn encode(&self, buf: &mut BytesMut) {
384 buf.extend_from_slice(&self.to_le_bytes());
385 }
386}
387
388impl Decode for u16 {
389 fn decode(buf: &mut &[u8]) -> Result<Self, EncodingError> {
390 if buf.len() < 2 {
391 return Err(EncodingError {
392 message: "Buffer too short for u16".to_string(),
393 });
394 }
395 let value = u16::from_le_bytes([buf[0], buf[1]]);
396 *buf = &buf[2..];
397 Ok(value)
398 }
399}
400
401impl Encode for u32 {
402 fn encode(&self, buf: &mut BytesMut) {
403 buf.extend_from_slice(&self.to_le_bytes());
404 }
405}
406
407impl Decode for u32 {
408 fn decode(buf: &mut &[u8]) -> Result<Self, EncodingError> {
409 if buf.len() < 4 {
410 return Err(EncodingError {
411 message: "Buffer too short for u32".to_string(),
412 });
413 }
414 let value = u32::from_le_bytes([buf[0], buf[1], buf[2], buf[3]]);
415 *buf = &buf[4..];
416 Ok(value)
417 }
418}
419
420impl Encode for u64 {
421 fn encode(&self, buf: &mut BytesMut) {
422 buf.extend_from_slice(&self.to_le_bytes());
423 }
424}
425
426impl Decode for u64 {
427 fn decode(buf: &mut &[u8]) -> Result<Self, EncodingError> {
428 if buf.len() < 8 {
429 return Err(EncodingError {
430 message: "Buffer too short for u64".to_string(),
431 });
432 }
433 let value = u64::from_le_bytes([
434 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
435 ]);
436 *buf = &buf[8..];
437 Ok(value)
438 }
439}
440
441impl Encode for i64 {
442 fn encode(&self, buf: &mut BytesMut) {
443 buf.extend_from_slice(&self.to_le_bytes());
444 }
445}
446
447impl Decode for i64 {
448 fn decode(buf: &mut &[u8]) -> Result<Self, EncodingError> {
449 if buf.len() < 8 {
450 return Err(EncodingError {
451 message: "Buffer too short for i64".to_string(),
452 });
453 }
454 let value = i64::from_le_bytes([
455 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
456 ]);
457 *buf = &buf[8..];
458 Ok(value)
459 }
460}
461
462impl Encode for f64 {
463 fn encode(&self, buf: &mut BytesMut) {
464 buf.extend_from_slice(&self.to_le_bytes());
465 }
466}
467
468impl Decode for f64 {
469 fn decode(buf: &mut &[u8]) -> Result<Self, EncodingError> {
470 if buf.len() < 8 {
471 return Err(EncodingError {
472 message: "Buffer too short for f64".to_string(),
473 });
474 }
475 let value = f64::from_le_bytes([
476 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
477 ]);
478 *buf = &buf[8..];
479 Ok(value)
480 }
481}
482
483impl Encode for FieldValue {
487 fn encode(&self, buf: &mut BytesMut) {
488 match self {
489 FieldValue::String(s) => {
490 buf.extend_from_slice(&[FieldType::String as u8]);
491 encode_utf8(s, buf);
492 }
493 FieldValue::Int64(v) => {
494 buf.extend_from_slice(&[FieldType::Int64 as u8]);
495 buf.extend_from_slice(&v.to_le_bytes());
496 }
497 FieldValue::Float64(v) => {
498 buf.extend_from_slice(&[FieldType::Float64 as u8]);
499 buf.extend_from_slice(&v.to_le_bytes());
500 }
501 FieldValue::Bool(v) => {
502 buf.extend_from_slice(&[FieldType::Bool as u8]);
503 buf.extend_from_slice(&[if *v { 1 } else { 0 }]);
504 }
505 FieldValue::Vector(v) => {
506 buf.extend_from_slice(&[FieldType::Vector as u8]);
507 encode_fixed_element_array(v, buf);
508 }
509 }
510 }
511}
512
513impl Decode for FieldValue {
518 fn decode(buf: &mut &[u8]) -> Result<Self, EncodingError> {
519 if buf.is_empty() {
520 return Err(EncodingError {
521 message: "Buffer too short for FieldValue type".to_string(),
522 });
523 }
524
525 let field_type = FieldType::from_byte(buf[0])?;
526 *buf = &buf[1..];
527
528 match field_type {
529 FieldType::String => {
530 let s = decode_utf8(buf)?;
531 Ok(FieldValue::String(s))
532 }
533 FieldType::Int64 => {
534 if buf.len() < 8 {
535 return Err(EncodingError {
536 message: "Buffer too short for Int64 value".to_string(),
537 });
538 }
539 let v = i64::from_le_bytes([
540 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
541 ]);
542 *buf = &buf[8..];
543 Ok(FieldValue::Int64(v))
544 }
545 FieldType::Float64 => {
546 if buf.len() < 8 {
547 return Err(EncodingError {
548 message: "Buffer too short for Float64 value".to_string(),
549 });
550 }
551 let v = f64::from_le_bytes([
552 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
553 ]);
554 *buf = &buf[8..];
555 Ok(FieldValue::Float64(v))
556 }
557 FieldType::Bool => {
558 if buf.is_empty() {
559 return Err(EncodingError {
560 message: "Buffer too short for Bool value".to_string(),
561 });
562 }
563 let v = buf[0] != 0;
564 *buf = &buf[1..];
565 Ok(FieldValue::Bool(v))
566 }
567 FieldType::Vector => Err(EncodingError {
568 message: "Vector type requires dimensions context; use decode_with_dimensions"
569 .to_string(),
570 }),
571 }
572 }
573}
574
575impl From<crate::model::AttributeValue> for FieldValue {
576 fn from(attr: crate::model::AttributeValue) -> Self {
577 match attr {
578 crate::model::AttributeValue::String(s) => FieldValue::String(s),
579 crate::model::AttributeValue::Int64(v) => FieldValue::Int64(v),
580 crate::model::AttributeValue::Float64(v) => FieldValue::Float64(v),
581 crate::model::AttributeValue::Bool(v) => FieldValue::Bool(v),
582 crate::model::AttributeValue::Vector(v) => FieldValue::Vector(v),
583 }
584 }
585}
586
587impl FieldValue {
588 pub fn decode_with_dimensions(
592 buf: &mut &[u8],
593 dimensions: usize,
594 ) -> Result<Self, EncodingError> {
595 if buf.is_empty() {
596 return Err(EncodingError {
597 message: "Buffer too short for FieldValue type".to_string(),
598 });
599 }
600
601 let field_type = FieldType::from_byte(buf[0])?;
602 *buf = &buf[1..];
603
604 match field_type {
605 FieldType::String => {
606 let s = decode_utf8(buf)?;
607 Ok(FieldValue::String(s))
608 }
609 FieldType::Int64 => {
610 if buf.len() < 8 {
611 return Err(EncodingError {
612 message: "Buffer too short for Int64 value".to_string(),
613 });
614 }
615 let v = i64::from_le_bytes([
616 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
617 ]);
618 *buf = &buf[8..];
619 Ok(FieldValue::Int64(v))
620 }
621 FieldType::Float64 => {
622 if buf.len() < 8 {
623 return Err(EncodingError {
624 message: "Buffer too short for Float64 value".to_string(),
625 });
626 }
627 let v = f64::from_le_bytes([
628 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
629 ]);
630 *buf = &buf[8..];
631 Ok(FieldValue::Float64(v))
632 }
633 FieldType::Bool => {
634 if buf.is_empty() {
635 return Err(EncodingError {
636 message: "Buffer too short for Bool value".to_string(),
637 });
638 }
639 let v = buf[0] != 0;
640 *buf = &buf[1..];
641 Ok(FieldValue::Bool(v))
642 }
643 FieldType::Vector => {
644 let expected_bytes = dimensions * 4;
645 if buf.len() < expected_bytes {
646 return Err(EncodingError {
647 message: format!(
648 "Buffer too short for Vector value: expected {} bytes, got {}",
649 expected_bytes,
650 buf.len()
651 ),
652 });
653 }
654 let mut vector_buf = &buf[..expected_bytes];
656 let vector: Vec<f32> = decode_fixed_element_array(&mut vector_buf, 4)?;
657 *buf = &buf[expected_bytes..];
658 Ok(FieldValue::Vector(vector))
659 }
660 }
661 }
662}
663
664#[cfg(test)]
665mod tests {
666 use super::*;
667
668 #[test]
669 fn should_encode_and_decode_record_tag() {
670 let record_tag = RecordType::CollectionMeta.tag();
672
673 let encoded = record_tag.as_byte();
675 let decoded = RecordTag::from_byte(encoded).unwrap();
676
677 assert_eq!(decoded.as_byte(), record_tag.as_byte());
679 assert_eq!(
680 record_type_from_tag(decoded).unwrap(),
681 RecordType::CollectionMeta
682 );
683 }
684
685 #[test]
686 fn should_convert_all_record_types() {
687 let types = [
689 RecordType::CollectionMeta,
690 RecordType::Deletions,
691 RecordType::CentroidChunk,
692 RecordType::PostingList,
693 RecordType::IdDictionary,
694 RecordType::VectorData,
695 RecordType::MetadataIndex,
697 RecordType::SeqBlock,
698 RecordType::CentroidStats,
699 ];
700
701 for record_type in types {
702 let id = record_type.id();
704 let recovered = RecordType::from_id(id).unwrap();
705
706 assert_eq!(recovered, record_type);
708 }
709 }
710
711 #[test]
712 fn should_encode_and_decode_f32() {
713 let value = 1.23456f32;
715 let mut buf = BytesMut::new();
716
717 value.encode(&mut buf);
719 let mut slice = buf.as_ref();
720 let decoded = f32::decode(&mut slice).unwrap();
721
722 assert_eq!(decoded, value);
724 assert!(slice.is_empty());
725 }
726
727 #[test]
728 fn should_encode_and_decode_u64() {
729 let value = 0xDEADBEEF_CAFEBABE_u64;
731 let mut buf = BytesMut::new();
732
733 value.encode(&mut buf);
735 let mut slice = buf.as_ref();
736 let decoded = u64::decode(&mut slice).unwrap();
737
738 assert_eq!(decoded, value);
740 assert!(slice.is_empty());
741 }
742
743 #[test]
744 fn should_encode_and_decode_fixed_element_array() {
745 let values = vec![1.0f32, 2.0, 3.0, 4.0];
747 let mut buf = BytesMut::new();
748
749 encode_fixed_element_array(&values, &mut buf);
751 let mut slice = buf.as_ref();
752 let decoded: Vec<f32> = decode_fixed_element_array(&mut slice, 4).unwrap();
753
754 assert_eq!(decoded, values);
756 assert!(slice.is_empty());
757 }
758}