Skip to main content

ailake_file/
footer.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2// Binary layout for the AI-Lake footer extension.
3// See docs/specs/FILE_FORMAT.md for field-by-field spec.
4
5use ailake_core::{AilakeError, AilakeResult, VectorMetric, VectorPrecision};
6
7pub const AILAKE_MAGIC: [u8; 4] = *b"AILK";
8pub const AILAKE_FORMAT_VERSION: u16 = 1;
9pub const TRAILER_SIZE: usize = 24;
10pub const HEADER_SIZE: usize = 64;
11
12/// `flags` bit 0 = 0: HNSW index (default). bit 0 = 1: IVF-PQ index. bit 1 = 1: RaBitQ index.
13pub const FLAG_INDEX_IVF_PQ: u16 = 0x0001;
14pub const FLAG_INDEX_RABITQ: u16 = 0x0002;
15
16#[repr(u8)]
17#[derive(Debug, Clone, Copy, PartialEq)]
18pub enum Precision {
19    F32 = 0,
20    F16 = 1,
21    I8 = 2,
22    Binary = 3,
23}
24
25#[repr(u8)]
26#[derive(Debug, Clone, Copy, PartialEq)]
27pub enum DistanceMetric {
28    Cosine = 0,
29    Euclidean = 1,
30    DotProduct = 2,
31    NormalizedCosine = 3,
32}
33
34impl From<VectorPrecision> for Precision {
35    fn from(p: VectorPrecision) -> Self {
36        match p {
37            VectorPrecision::F32 => Precision::F32,
38            VectorPrecision::F16 => Precision::F16,
39            VectorPrecision::I8 => Precision::I8,
40            VectorPrecision::Binary => Precision::Binary,
41        }
42    }
43}
44
45impl From<VectorMetric> for DistanceMetric {
46    fn from(m: VectorMetric) -> Self {
47        match m {
48            VectorMetric::Cosine => DistanceMetric::Cosine,
49            VectorMetric::Euclidean => DistanceMetric::Euclidean,
50            VectorMetric::DotProduct => DistanceMetric::DotProduct,
51            VectorMetric::NormalizedCosine => DistanceMetric::NormalizedCosine,
52        }
53    }
54}
55
56impl TryFrom<u8> for Precision {
57    type Error = AilakeError;
58    fn try_from(v: u8) -> AilakeResult<Self> {
59        match v {
60            0 => Ok(Precision::F32),
61            1 => Ok(Precision::F16),
62            2 => Ok(Precision::I8),
63            3 => Ok(Precision::Binary),
64            _ => Err(AilakeError::UnsupportedFormatVersion(v as u16)),
65        }
66    }
67}
68
69impl TryFrom<u8> for DistanceMetric {
70    type Error = AilakeError;
71    fn try_from(v: u8) -> AilakeResult<Self> {
72        match v {
73            0 => Ok(DistanceMetric::Cosine),
74            1 => Ok(DistanceMetric::Euclidean),
75            2 => Ok(DistanceMetric::DotProduct),
76            3 => Ok(DistanceMetric::NormalizedCosine),
77            _ => Err(AilakeError::UnsupportedFormatVersion(v as u16)),
78        }
79    }
80}
81
82/// 64-byte header at the start of the AI-Lake footer extension.
83#[derive(Debug, Clone)]
84pub struct AilakeHeader {
85    pub format_version: u16,
86    pub flags: u16,
87    pub dim: u32,
88    pub precision: Precision,
89    pub distance_metric: DistanceMetric,
90    pub record_count: u64,
91    pub centroid_offset: u64,
92    pub centroid_len: u64,
93    pub hnsw_offset: u64,
94    pub hnsw_len: u64,
95}
96
97impl AilakeHeader {
98    pub fn to_bytes(&self) -> [u8; HEADER_SIZE] {
99        let mut b = [0u8; HEADER_SIZE];
100        b[0..4].copy_from_slice(&AILAKE_MAGIC);
101        b[4..6].copy_from_slice(&self.format_version.to_le_bytes());
102        b[6..8].copy_from_slice(&self.flags.to_le_bytes());
103        b[8..12].copy_from_slice(&self.dim.to_le_bytes());
104        b[12] = self.precision as u8;
105        b[13] = self.distance_metric as u8;
106        // b[14..16] reserved = 0
107        b[16..24].copy_from_slice(&self.record_count.to_le_bytes());
108        b[24..32].copy_from_slice(&self.centroid_offset.to_le_bytes());
109        b[32..40].copy_from_slice(&self.centroid_len.to_le_bytes());
110        b[40..48].copy_from_slice(&self.hnsw_offset.to_le_bytes());
111        b[48..56].copy_from_slice(&self.hnsw_len.to_le_bytes());
112        // b[56..64] reserved = 0
113        b
114    }
115
116    pub fn from_bytes(b: &[u8; HEADER_SIZE]) -> AilakeResult<Self> {
117        if b[0..4] != AILAKE_MAGIC {
118            return Err(AilakeError::InvalidAilakeMagic(b[0..4].try_into().unwrap()));
119        }
120        let format_version = u16::from_le_bytes(b[4..6].try_into().unwrap());
121        if format_version != AILAKE_FORMAT_VERSION {
122            return Err(AilakeError::UnsupportedFormatVersion(format_version));
123        }
124        Ok(AilakeHeader {
125            format_version,
126            flags: u16::from_le_bytes(b[6..8].try_into().unwrap()),
127            dim: u32::from_le_bytes(b[8..12].try_into().unwrap()),
128            precision: Precision::try_from(b[12])?,
129            distance_metric: DistanceMetric::try_from(b[13])?,
130            record_count: u64::from_le_bytes(b[16..24].try_into().unwrap()),
131            centroid_offset: u64::from_le_bytes(b[24..32].try_into().unwrap()),
132            centroid_len: u64::from_le_bytes(b[32..40].try_into().unwrap()),
133            hnsw_offset: u64::from_le_bytes(b[40..48].try_into().unwrap()),
134            hnsw_len: u64::from_le_bytes(b[48..56].try_into().unwrap()),
135        })
136    }
137}
138
139/// 24-byte trailer — the last bytes of every AI-Lake file.
140#[derive(Debug, Clone)]
141pub struct AilakeTrailer {
142    pub footer_offset: u64,
143    pub footer_len: u64,
144    pub format_version: u16,
145    pub flags: u16,
146}
147
148impl AilakeTrailer {
149    pub fn to_bytes(&self) -> [u8; TRAILER_SIZE] {
150        let mut b = [0u8; TRAILER_SIZE];
151        b[0..8].copy_from_slice(&self.footer_offset.to_le_bytes());
152        b[8..16].copy_from_slice(&self.footer_len.to_le_bytes());
153        b[16..18].copy_from_slice(&self.format_version.to_le_bytes());
154        b[18..20].copy_from_slice(&self.flags.to_le_bytes());
155        b[20..24].copy_from_slice(&AILAKE_MAGIC);
156        b
157    }
158
159    pub fn from_bytes(b: &[u8; TRAILER_SIZE]) -> AilakeResult<Self> {
160        if b[20..24] != AILAKE_MAGIC {
161            return Err(AilakeError::InvalidAilakeMagic(
162                b[20..24].try_into().unwrap(),
163            ));
164        }
165        Ok(AilakeTrailer {
166            footer_offset: u64::from_le_bytes(b[0..8].try_into().unwrap()),
167            footer_len: u64::from_le_bytes(b[8..16].try_into().unwrap()),
168            format_version: u16::from_le_bytes(b[16..18].try_into().unwrap()),
169            flags: u16::from_le_bytes(b[18..20].try_into().unwrap()),
170        })
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn header_roundtrip() {
180        let h = AilakeHeader {
181            format_version: 1,
182            flags: 0,
183            dim: 1536,
184            precision: Precision::F16,
185            distance_metric: DistanceMetric::Cosine,
186            record_count: 50_000,
187            centroid_offset: 64,
188            centroid_len: 1536 * 4 + 4,
189            hnsw_offset: 64 + 1536 * 4 + 4,
190            hnsw_len: 4_194_304,
191        };
192        let bytes = h.to_bytes();
193        let h2 = AilakeHeader::from_bytes(&bytes).unwrap();
194        assert_eq!(h2.dim, 1536);
195        assert_eq!(h2.precision, Precision::F16);
196        assert_eq!(h2.distance_metric, DistanceMetric::Cosine);
197        assert_eq!(h2.record_count, 50_000);
198    }
199
200    #[test]
201    fn trailer_roundtrip() {
202        let t = AilakeTrailer {
203            footer_offset: 12_582_912,
204            footer_len: 4_194_304,
205            format_version: 1,
206            flags: 0,
207        };
208        let bytes = t.to_bytes();
209        let t2 = AilakeTrailer::from_bytes(&bytes).unwrap();
210        assert_eq!(t2.footer_offset, 12_582_912);
211        assert_eq!(&bytes[20..24], b"AILK");
212    }
213
214    #[test]
215    fn invalid_magic_rejected() {
216        let mut bytes = [0u8; HEADER_SIZE];
217        bytes[0..4].copy_from_slice(b"BLAH");
218        assert!(AilakeHeader::from_bytes(&bytes).is_err());
219    }
220}