lnmp_embedding/
decoder.rs

1use crate::vector::{EmbeddingType, Vector};
2use byteorder::{LittleEndian, ReadBytesExt};
3use std::io::Cursor;
4
5pub struct Decoder;
6
7impl Decoder {
8    pub fn decode(data: &[u8]) -> Result<Vector, std::io::Error> {
9        let mut rdr = Cursor::new(data);
10
11        let dim = rdr.read_u16::<LittleEndian>()?;
12        let dtype_byte = rdr.read_u8()?;
13        let _similarity = rdr.read_u8()?; // Read and ignore for now
14
15        let dtype = match dtype_byte {
16            0x01 => EmbeddingType::F32,
17            0x02 => EmbeddingType::F16,
18            0x03 => EmbeddingType::I8,
19            0x04 => EmbeddingType::U8,
20            0x05 => EmbeddingType::Binary,
21            _ => {
22                return Err(std::io::Error::new(
23                    std::io::ErrorKind::InvalidData,
24                    "Invalid dtype",
25                ))
26            }
27        };
28
29        let pos = rdr.position() as usize;
30        let vector_data = data[pos..].to_vec();
31
32        // Validation could happen here (check length matches dim * type_size)
33
34        Ok(Vector {
35            dim,
36            dtype,
37            data: vector_data,
38        })
39    }
40}