Skip to main content

citadel_vector/
segment.rs

1//! The ANNSEG body format: a storage-agnostic byte encoding of everything a
2//! built [`AnnIndex`] holds EXCEPT the f32 vectors. The vectors are rehydrated
3//! at load time from the table rows themselves - the rows are the source of
4//! truth, and the rehydration scan doubles as the staleness proof (it computes
5//! the content fingerprint the storage layer compares against its header).
6//!
7//! Layout: a fixed sequence of REQUIRED sections, each
8//! `[tag u8][len u64 LE][payload][blake3(payload) 32B]`. Per-section hashes
9//! refuse corruption at the section that broke; the storage layer additionally
10//! hashes the whole body. All integers little-endian. Any layout change bumps
11//! the storage header's `format_version` - this module never reads old
12//! formats silently.
13//!
14//! `PointStore.vectors` order is PRISM-INTERNAL (cell-reordered): loaders must
15//! place each scanned row's vector at `inverse(id_map)[row_id]`, never in scan
16//! order - a scan-order fill silently corrupts every f32 rerank.
17
18use rustc_hash::FxHashMap;
19
20use crate::ann::AnnIndex;
21use crate::prism::{
22    BinaryStore, Cell, Graph, Metric, PartitionTree, PointStore, PrismConfig, PrismIndex, SQ8Store,
23};
24
25#[derive(Debug, thiserror::Error)]
26pub enum SegmentError {
27    #[error("segment truncated in {0}")]
28    Truncated(&'static str),
29    #[error("segment section tag mismatch: expected {expected}, got {got}")]
30    BadTag { expected: u8, got: u8 },
31    #[error("segment section {0} failed its BLAKE3 check (corrupt)")]
32    SectionHash(&'static str),
33    #[error("segment metric tag {0} unknown")]
34    BadMetric(u8),
35    #[error("rehydrated vectors length {got} != n*dim {expected}")]
36    VectorLen { expected: usize, got: usize },
37    #[error("rehydration filled {got} of {expected} vector slots")]
38    RehydrationIncomplete { expected: usize, got: usize },
39    #[error("segment internal inconsistency: {0}")]
40    Inconsistent(&'static str),
41}
42
43const TAG_GRAPH: u8 = 1;
44const TAG_LOCAL_GRAPH: u8 = 2;
45const TAG_SQ8: u8 = 3;
46const TAG_BINARY: u8 = 4;
47const TAG_TREE: u8 = 5;
48const TAG_IDS: u8 = 6;
49const TAG_ATTRS: u8 = 7;
50
51/// BLAKE3 of the canonical little-endian encoding of EVERY [`PrismConfig`]
52/// field, domain-separated. The storage header pins this; a binary whose
53/// active config differs must refuse the segment (the graph was built for a
54/// different search geometry). The domain string carries the search-geometry
55/// version: bump it whenever build or search semantics change shape.
56pub fn prism_config_hash(cfg: &PrismConfig) -> [u8; 32] {
57    let mut h = blake3::Hasher::new();
58    h.update(b"citadel-annseg-config-v2");
59    for v in [
60        cfg.m_local as u64,
61        cfg.m_greedy as u64,
62        cfg.m_random as u64,
63        cfg.t as u64,
64        cfg.beam_width as u64,
65        cfg.binary_rerank as u64,
66    ] {
67        h.update(&v.to_le_bytes());
68    }
69    for v in [
70        cfg.alpha,
71        cfg.vamana_alpha,
72        cfg.sigma_high,
73        cfg.sigma_low,
74        cfg.beta,
75        cfg.epsilon,
76    ] {
77        h.update(&v.to_le_bytes());
78    }
79    h.update(&[metric_tag(cfg.metric)]);
80    *h.finalize().as_bytes()
81}
82
83pub fn metric_tag(m: Metric) -> u8 {
84    match m {
85        Metric::L2 => 0,
86        Metric::InnerProduct => 1,
87        Metric::Cosine => 2,
88    }
89}
90
91fn metric_from_tag(t: u8) -> Result<Metric, SegmentError> {
92    Ok(match t {
93        0 => Metric::L2,
94        1 => Metric::InnerProduct,
95        2 => Metric::Cosine,
96        other => return Err(SegmentError::BadMetric(other)),
97    })
98}
99
100/// Encode everything but the vectors. The output is the segment BODY; the
101/// storage layer wraps it in its header (fingerprint, config hash, counts).
102pub fn encode(index: &AnnIndex) -> Vec<u8> {
103    let p = index.prism();
104    let mut out = Vec::new();
105
106    section(&mut out, TAG_GRAPH, |b| encode_graph(b, &p.graph));
107    section(&mut out, TAG_LOCAL_GRAPH, |b| {
108        encode_graph(b, &p.local_graph)
109    });
110    section(&mut out, TAG_SQ8, |b| {
111        push_u64(b, p.sq8.dim() as u64);
112        push_slice_u8(b, p.sq8.codes());
113        push_slice_f32(b, p.sq8.mins());
114        push_slice_f32(b, p.sq8.scales());
115    });
116    section(&mut out, TAG_BINARY, |b| {
117        push_u64(b, p.binary.code_words() as u64);
118        push_u64(b, p.binary.block_size() as u64);
119        push_slice_u64(b, p.binary.codes());
120        push_slice_f32(b, p.binary.signs());
121    });
122    section(&mut out, TAG_TREE, |b| {
123        push_u64(b, p.tree.k as u64);
124        push_u64(b, p.tree.split_order.len() as u64);
125        for &s in &p.tree.split_order {
126            push_u64(b, s as u64);
127        }
128        push_u64(b, p.tree.cells.len() as u64);
129        for cell in &p.tree.cells {
130            push_slice_u32(b, &cell.values);
131            push_slice_u32(b, &cell.point_ids);
132        }
133    });
134    section(&mut out, TAG_IDS, |b| {
135        push_u64(b, index.snapshot_max);
136        b.push(metric_tag(index.metric));
137        b.extend_from_slice(&index.dim.to_le_bytes());
138        push_u64(b, u64::from(p.global_medoid));
139        push_slice_u32(b, &p.medoids);
140        push_slice_u32(b, &p.point_cell);
141        push_slice_u32(b, &p.original_ids);
142        push_slice_u64(b, index.id_map());
143    });
144    section(&mut out, TAG_ATTRS, |b| {
145        push_u64(b, p.store.attrs.len() as u64);
146        push_u64(b, p.store.len as u64);
147        for col in &p.store.attrs {
148            push_slice_u32(b, col);
149        }
150    });
151    out
152}
153
154/// Everything a segment carries; vectors arrive separately via
155/// [`SegmentParts::into_index`].
156pub struct SegmentParts {
157    graph: Graph,
158    local_graph: Graph,
159    sq8: SQ8Store,
160    binary: BinaryStore,
161    tree: PartitionTree,
162    snapshot_max: u64,
163    metric: Metric,
164    dim: u16,
165    global_medoid: u32,
166    medoids: Vec<u32>,
167    point_cell: Vec<u32>,
168    original_ids: Vec<u32>,
169    id_map: Vec<u64>,
170    attrs: Vec<Vec<u32>>,
171    n: usize,
172}
173
174impl SegmentParts {
175    pub fn n(&self) -> usize {
176        self.n
177    }
178
179    pub fn dim(&self) -> u16 {
180        self.dim
181    }
182
183    pub fn metric(&self) -> Metric {
184        self.metric
185    }
186
187    pub fn id_map(&self) -> &[u64] {
188        &self.id_map
189    }
190
191    /// `row_id -> PRISM-internal slot`: the PERMUTATION rehydration must use.
192    pub fn internal_of_row(&self) -> FxHashMap<u64, u32> {
193        self.id_map
194            .iter()
195            .enumerate()
196            .map(|(internal, &row)| (row, internal as u32))
197            .collect()
198    }
199
200    /// Finish the index with vectors ALREADY PLACED in PRISM-internal order
201    /// (`filled` = how many slots the loader filled; must be exactly `n`).
202    pub fn into_index(
203        self,
204        mut vectors: Vec<f32>,
205        filled: usize,
206    ) -> Result<AnnIndex, SegmentError> {
207        if filled != self.n {
208            return Err(SegmentError::RehydrationIncomplete {
209                expected: self.n,
210                got: filled,
211            });
212        }
213        if vectors.len() != self.n * self.dim as usize {
214            return Err(SegmentError::VectorLen {
215                expected: self.n * self.dim as usize,
216                got: vectors.len(),
217            });
218        }
219        // Cosine stores are build-normalized; rehydrated row vectors are raw.
220        // Re-applying the same normalization keeps loaded-segment search
221        // bit-identical to the freshly built index.
222        if self.metric == Metric::Cosine {
223            crate::prism::distance::normalize_rows(&mut vectors, self.dim as usize);
224        }
225        let store = PointStore::from_parts(vectors, self.dim as usize, self.attrs);
226        let prism = PrismIndex {
227            store,
228            tree: self.tree,
229            graph: self.graph,
230            local_graph: self.local_graph,
231            medoids: self.medoids,
232            global_medoid: self.global_medoid,
233            point_cell: self.point_cell,
234            original_ids: self.original_ids,
235            sq8: self.sq8,
236            binary: self.binary,
237            config: AnnIndex::active_config(self.metric),
238        };
239        Ok(AnnIndex::from_parts(
240            prism,
241            self.id_map,
242            self.snapshot_max,
243            self.metric,
244            self.dim,
245        ))
246    }
247}
248
249/// Decode a segment body. Every section's BLAKE3 must verify; any mismatch is
250/// a corruption refusal, never a partial result.
251pub fn decode(bytes: &[u8]) -> Result<SegmentParts, SegmentError> {
252    let mut r = Reader { buf: bytes, at: 0 };
253
254    let g = r.section(TAG_GRAPH, "graph")?;
255    let graph = decode_graph(&mut Reader { buf: g, at: 0 }, "graph")?;
256    let lg = r.section(TAG_LOCAL_GRAPH, "local_graph")?;
257    let local_graph = decode_graph(&mut Reader { buf: lg, at: 0 }, "local_graph")?;
258
259    let s = r.section(TAG_SQ8, "sq8")?;
260    let mut sr = Reader { buf: s, at: 0 };
261    let sq8_dim = sr.u64("sq8")? as usize;
262    let codes = sr.slice_u8("sq8")?.to_vec();
263    let mins = sr.slice_f32("sq8")?;
264    let scales = sr.slice_f32("sq8")?;
265    let sq8 = SQ8Store::from_parts(codes, mins, scales, sq8_dim);
266
267    let b = r.section(TAG_BINARY, "binary")?;
268    let mut br = Reader { buf: b, at: 0 };
269    let code_words = br.u64("binary")? as usize;
270    let block_size = br.u64("binary")? as usize;
271    let bcodes = br.slice_u64("binary")?;
272    let signs = br.slice_f32("binary")?;
273    let binary = BinaryStore::from_parts(bcodes, code_words, signs, block_size);
274
275    let t = r.section(TAG_TREE, "tree")?;
276    let mut tr = Reader { buf: t, at: 0 };
277    let k = tr.u64("tree")? as usize;
278    let so_len = tr.u64("tree")? as usize;
279    let mut split_order = Vec::with_capacity(so_len);
280    for _ in 0..so_len {
281        split_order.push(tr.u64("tree")? as usize);
282    }
283    let cells_len = tr.u64("tree")? as usize;
284    let mut cells = Vec::with_capacity(cells_len);
285    for _ in 0..cells_len {
286        let values = tr.slice_u32("tree")?;
287        let point_ids = tr.slice_u32("tree")?;
288        cells.push(Cell { values, point_ids });
289    }
290    let tree = PartitionTree {
291        cells,
292        split_order,
293        k,
294    };
295
296    let i = r.section(TAG_IDS, "ids")?;
297    let mut ir = Reader { buf: i, at: 0 };
298    let snapshot_max = ir.u64("ids")?;
299    let metric = metric_from_tag(ir.u8("ids")?)?;
300    let dim = ir.u16("ids")?;
301    let global_medoid = ir.u64("ids")? as u32;
302    let medoids = ir.slice_u32("ids")?;
303    let point_cell = ir.slice_u32("ids")?;
304    let original_ids = ir.slice_u32("ids")?;
305    let id_map = ir.slice_u64("ids")?;
306
307    let a = r.section(TAG_ATTRS, "attrs")?;
308    let mut ar = Reader { buf: a, at: 0 };
309    let attr_k = ar.u64("attrs")? as usize;
310    let n = ar.u64("attrs")? as usize;
311    let mut attrs = Vec::with_capacity(attr_k);
312    for _ in 0..attr_k {
313        let col = ar.slice_u32("attrs")?;
314        if col.len() != n {
315            return Err(SegmentError::Inconsistent("attr column length != n"));
316        }
317        attrs.push(col);
318    }
319
320    if id_map.len() != n || original_ids.len() != n || point_cell.len() != n {
321        return Err(SegmentError::Inconsistent("id arrays disagree on n"));
322    }
323    Ok(SegmentParts {
324        graph,
325        local_graph,
326        sq8,
327        binary,
328        tree,
329        snapshot_max,
330        metric,
331        dim,
332        global_medoid,
333        medoids,
334        point_cell,
335        original_ids,
336        id_map,
337        attrs,
338        n,
339    })
340}
341
342fn encode_graph(b: &mut Vec<u8>, g: &Graph) {
343    push_u64(b, g.n as u64);
344    push_slice_u32(b, &g.offsets);
345    push_slice_u32(b, &g.neighbors);
346}
347
348fn decode_graph(r: &mut Reader<'_>, what: &'static str) -> Result<Graph, SegmentError> {
349    let n = r.u64(what)? as usize;
350    let offsets = r.slice_u32(what)?;
351    let neighbors = r.slice_u32(what)?;
352    if offsets.len() != n + 1 {
353        return Err(SegmentError::Inconsistent("graph offsets length != n+1"));
354    }
355    Ok(Graph {
356        offsets,
357        neighbors,
358        n,
359    })
360}
361
362fn section(out: &mut Vec<u8>, tag: u8, fill: impl FnOnce(&mut Vec<u8>)) {
363    let mut payload = Vec::new();
364    fill(&mut payload);
365    out.push(tag);
366    push_u64(out, payload.len() as u64);
367    let hash = blake3::hash(&payload);
368    out.extend_from_slice(&payload);
369    out.extend_from_slice(hash.as_bytes());
370}
371
372fn push_u64(b: &mut Vec<u8>, v: u64) {
373    b.extend_from_slice(&v.to_le_bytes());
374}
375
376fn push_slice_u8(b: &mut Vec<u8>, s: &[u8]) {
377    push_u64(b, s.len() as u64);
378    b.extend_from_slice(s);
379}
380
381fn push_slice_u32(b: &mut Vec<u8>, s: &[u32]) {
382    push_u64(b, s.len() as u64);
383    for &v in s {
384        b.extend_from_slice(&v.to_le_bytes());
385    }
386}
387
388fn push_slice_u64(b: &mut Vec<u8>, s: &[u64]) {
389    push_u64(b, s.len() as u64);
390    for &v in s {
391        b.extend_from_slice(&v.to_le_bytes());
392    }
393}
394
395fn push_slice_f32(b: &mut Vec<u8>, s: &[f32]) {
396    push_u64(b, s.len() as u64);
397    for &v in s {
398        b.extend_from_slice(&v.to_le_bytes());
399    }
400}
401
402struct Reader<'a> {
403    buf: &'a [u8],
404    at: usize,
405}
406
407impl<'a> Reader<'a> {
408    fn take(&mut self, n: usize, what: &'static str) -> Result<&'a [u8], SegmentError> {
409        let end = self
410            .at
411            .checked_add(n)
412            .filter(|&e| e <= self.buf.len())
413            .ok_or(SegmentError::Truncated(what))?;
414        let s = &self.buf[self.at..end];
415        self.at = end;
416        Ok(s)
417    }
418
419    fn u8(&mut self, what: &'static str) -> Result<u8, SegmentError> {
420        Ok(self.take(1, what)?[0])
421    }
422
423    fn u16(&mut self, what: &'static str) -> Result<u16, SegmentError> {
424        Ok(u16::from_le_bytes(self.take(2, what)?.try_into().unwrap()))
425    }
426
427    fn u64(&mut self, what: &'static str) -> Result<u64, SegmentError> {
428        Ok(u64::from_le_bytes(self.take(8, what)?.try_into().unwrap()))
429    }
430
431    /// One framed section: tag + length + payload + verified BLAKE3.
432    fn section(&mut self, tag: u8, what: &'static str) -> Result<&'a [u8], SegmentError> {
433        let got = self.u8(what)?;
434        if got != tag {
435            return Err(SegmentError::BadTag { expected: tag, got });
436        }
437        let len = self.u64(what)? as usize;
438        let payload = self.take(len, what)?;
439        let hash: [u8; 32] = self.take(32, what)?.try_into().unwrap();
440        if *blake3::hash(payload).as_bytes() != hash {
441            return Err(SegmentError::SectionHash(what));
442        }
443        Ok(payload)
444    }
445
446    fn slice_u8(&mut self, what: &'static str) -> Result<&'a [u8], SegmentError> {
447        let len = self.u64(what)? as usize;
448        self.take(len, what)
449    }
450
451    fn slice_u32(&mut self, what: &'static str) -> Result<Vec<u32>, SegmentError> {
452        let len = self.u64(what)? as usize;
453        let raw = self.take(
454            len.checked_mul(4).ok_or(SegmentError::Truncated(what))?,
455            what,
456        )?;
457        Ok(raw
458            .chunks_exact(4)
459            .map(|c| u32::from_le_bytes(c.try_into().unwrap()))
460            .collect())
461    }
462
463    fn slice_u64(&mut self, what: &'static str) -> Result<Vec<u64>, SegmentError> {
464        let len = self.u64(what)? as usize;
465        let raw = self.take(
466            len.checked_mul(8).ok_or(SegmentError::Truncated(what))?,
467            what,
468        )?;
469        Ok(raw
470            .chunks_exact(8)
471            .map(|c| u64::from_le_bytes(c.try_into().unwrap()))
472            .collect())
473    }
474
475    fn slice_f32(&mut self, what: &'static str) -> Result<Vec<f32>, SegmentError> {
476        let len = self.u64(what)? as usize;
477        let raw = self.take(
478            len.checked_mul(4).ok_or(SegmentError::Truncated(what))?,
479            what,
480        )?;
481        Ok(raw
482            .chunks_exact(4)
483            .map(|c| f32::from_le_bytes(c.try_into().unwrap()))
484            .collect())
485    }
486}
487
488#[cfg(test)]
489mod tests {
490    use super::*;
491
492    /// Deterministic fixture rows: two attribute cells and NON-monotonic row
493    /// ids (so id_map order != insertion order). RAW vectors, exactly what a
494    /// table scan would yield.
495    fn fixture_rows() -> Vec<(u64, Vec<f32>, Vec<u32>)> {
496        (0..200u64)
497            .map(|i| {
498                // Reverse-ish ids: external order differs from internal.
499                let id = 1000 - i * 3;
500                let v: Vec<f32> = (0..8).map(|d| ((i * 7 + d) % 23) as f32 * 0.5).collect();
501                (id, v, vec![(i % 2) as u32])
502            })
503            .collect()
504    }
505
506    fn build_fixture() -> AnnIndex {
507        AnnIndex::build_with_attrs(fixture_rows(), 1, Metric::Cosine, 8).expect("build fixture")
508    }
509
510    /// Rehydrate exactly as the storage loader will: RAW row vectors placed by
511    /// the id_map PERMUTATION (the index re-applies any build normalization).
512    fn rehydrate(rows: &[(u64, Vec<f32>, Vec<u32>)], parts: &SegmentParts) -> (Vec<f32>, usize) {
513        let inv = parts.internal_of_row();
514        let dim = parts.dim() as usize;
515        let mut vectors = vec![0.0f32; parts.n() * dim];
516        let mut filled = 0;
517        for (row, v, _) in rows {
518            let slot = inv[row] as usize;
519            vectors[slot * dim..(slot + 1) * dim].copy_from_slice(v);
520            filled += 1;
521        }
522        (vectors, filled)
523    }
524
525    #[test]
526    fn roundtrip_preserves_filtered_search_results_exactly() {
527        // Attribute-filtered search exercises the persisted tree + attrs +
528        // dicts machinery, not just the graph.
529        let index = build_fixture();
530        let parts = decode(&encode(&index)).expect("decode");
531        let (vectors, filled) = rehydrate(&fixture_rows(), &parts);
532        let loaded = parts.into_index(vectors, filled).expect("into_index");
533        let query: Vec<f32> = (0..8).map(|d| d as f32 * 0.7).collect();
534        for code in [0u32, 1] {
535            let filter = crate::prism::Filter::new(vec![(0, vec![code])]);
536            let a = index.search_filtered(&query, 8, 64, &filter);
537            let b = loaded.search_filtered(&query, 8, 64, &filter);
538            assert_eq!(a, b, "filtered (attr0={code}) results identical");
539            assert!(!a.is_empty(), "filter {code} matches half the fixture");
540        }
541    }
542
543    #[test]
544    fn roundtrip_holds_for_every_metric() {
545        for metric in [Metric::L2, Metric::InnerProduct, Metric::Cosine] {
546            let rows: Vec<(u64, Vec<f32>, Vec<u32>)> = (0..60u64)
547                .map(|i| {
548                    let v: Vec<f32> = (0..4).map(|d| ((i + d) % 13) as f32 - 6.0).collect();
549                    (i * 2 + 1, v, vec![0])
550                })
551                .collect();
552            let index = AnnIndex::build_with_attrs(rows.clone(), 1, metric, 4).expect("build");
553            let parts = decode(&encode(&index)).expect("decode");
554            assert_eq!(parts.metric(), metric, "metric tag survives");
555            let (vectors, filled) = rehydrate(&rows, &parts);
556            let loaded = parts.into_index(vectors, filled).expect("into_index");
557            let q = [1.0f32, -2.0, 3.0, 0.5];
558            assert_eq!(index.search(&q, 5), loaded.search(&q, 5), "{metric:?}");
559        }
560    }
561
562    #[test]
563    fn single_row_index_roundtrips() {
564        let rows = vec![(42u64, vec![1.0f32, 2.0], vec![0u32])];
565        let index =
566            AnnIndex::build_with_attrs(rows.clone(), 1, Metric::L2, 2).expect("build single");
567        let parts = decode(&encode(&index)).expect("decode");
568        assert_eq!(parts.n(), 1);
569        let (vectors, filled) = rehydrate(&rows, &parts);
570        let loaded = parts.into_index(vectors, filled).expect("into_index");
571        assert_eq!(loaded.search(&[1.0, 2.0], 1), vec![(42, 0.0)]);
572    }
573
574    #[test]
575    fn truncation_at_every_byte_boundary_is_refused() {
576        // Cutting the segment ANYWHERE must produce an error, never a panic or
577        // a silently partial decode.
578        let index = AnnIndex::build_with_attrs(
579            (0..12u64)
580                .map(|i| (i, vec![i as f32, 1.0], vec![0]))
581                .collect(),
582            1,
583            Metric::L2,
584            2,
585        )
586        .expect("build");
587        let bytes = encode(&index);
588        for cut in 0..bytes.len() {
589            assert!(
590                decode(&bytes[..cut]).is_err(),
591                "truncation at {cut}/{} must refuse",
592                bytes.len()
593            );
594        }
595    }
596
597    #[test]
598    fn internal_of_row_is_a_complete_bijection() {
599        let index = build_fixture();
600        let parts = decode(&encode(&index)).expect("decode");
601        let map = parts.internal_of_row();
602        assert_eq!(map.len(), parts.n(), "every row maps");
603        let mut slots: Vec<u32> = map.values().copied().collect();
604        slots.sort_unstable();
605        let expected: Vec<u32> = (0..parts.n() as u32).collect();
606        assert_eq!(slots, expected, "slots form a permutation of 0..n");
607    }
608
609    #[test]
610    fn wrong_vector_length_is_refused() {
611        let index = build_fixture();
612        let parts = decode(&encode(&index)).expect("decode");
613        let n = parts.n();
614        let too_short = vec![0.0f32; (n - 1) * 8];
615        assert!(matches!(
616            parts.into_index(too_short, n),
617            Err(SegmentError::VectorLen { .. })
618        ));
619    }
620
621    #[test]
622    fn roundtrip_preserves_search_results_exactly() {
623        let index = build_fixture();
624        let bytes = encode(&index);
625        let parts = decode(&bytes).expect("decode");
626        let (vectors, filled) = rehydrate(&fixture_rows(), &parts);
627        let loaded = parts.into_index(vectors, filled).expect("into_index");
628
629        let query: Vec<f32> = (0..8).map(|d| d as f32 * 0.3).collect();
630        let a = index.search(&query, 10);
631        let b = loaded.search(&query, 10);
632        assert_eq!(a, b, "loaded index must answer EXACTLY like the original");
633        assert_eq!(index.snapshot_max, loaded.snapshot_max);
634        assert_eq!(index.id_map(), loaded.id_map());
635    }
636
637    #[test]
638    fn every_section_corruption_is_refused() {
639        let index = build_fixture();
640        let bytes = encode(&index);
641        // Flip one byte inside each section's payload region and expect a
642        // refusal each time (walk the framing to find payload offsets).
643        let mut at = 0usize;
644        let mut payload_spots = Vec::new();
645        while at < bytes.len() {
646            let len = u64::from_le_bytes(bytes[at + 1..at + 9].try_into().unwrap()) as usize;
647            payload_spots.push(at + 9 + len / 2);
648            at += 1 + 8 + len + 32;
649        }
650        assert_eq!(payload_spots.len(), 7, "all seven sections present");
651        for spot in payload_spots {
652            let mut corrupt = bytes.clone();
653            corrupt[spot] ^= 0xFF;
654            assert!(
655                matches!(decode(&corrupt), Err(SegmentError::SectionHash(_))),
656                "corruption at {spot} must be refused"
657            );
658        }
659    }
660
661    #[test]
662    fn incomplete_rehydration_is_refused() {
663        let index = build_fixture();
664        let parts = decode(&encode(&index)).expect("decode");
665        let dim = parts.dim() as usize;
666        let n = parts.n();
667        let vectors = vec![0.0f32; n * dim];
668        assert!(matches!(
669            parts.into_index(vectors, n - 1),
670            Err(SegmentError::RehydrationIncomplete { .. })
671        ));
672    }
673
674    #[test]
675    fn config_hash_is_sensitive_to_every_field() {
676        let base = AnnIndex::active_config(Metric::Cosine);
677        let h0 = prism_config_hash(&base);
678        let variants: Vec<PrismConfig> = vec![
679            PrismConfig {
680                m_local: base.m_local + 1,
681                ..base.clone()
682            },
683            PrismConfig {
684                m_greedy: base.m_greedy + 1,
685                ..base.clone()
686            },
687            PrismConfig {
688                m_random: base.m_random + 2,
689                ..base.clone()
690            },
691            PrismConfig {
692                t: base.t + 1,
693                ..base.clone()
694            },
695            PrismConfig {
696                alpha: base.alpha + 0.5,
697                ..base.clone()
698            },
699            PrismConfig {
700                vamana_alpha: base.vamana_alpha + 0.5,
701                ..base.clone()
702            },
703            PrismConfig {
704                beam_width: base.beam_width + 1,
705                ..base.clone()
706            },
707            PrismConfig {
708                metric: Metric::L2,
709                ..base.clone()
710            },
711            PrismConfig {
712                sigma_high: base.sigma_high + 0.25,
713                ..base.clone()
714            },
715            PrismConfig {
716                sigma_low: base.sigma_low + 0.25,
717                ..base.clone()
718            },
719            PrismConfig {
720                beta: base.beta + 0.5,
721                ..base.clone()
722            },
723            PrismConfig {
724                epsilon: base.epsilon + 0.5,
725                ..base.clone()
726            },
727            PrismConfig {
728                binary_rerank: base.binary_rerank + 1,
729                ..base.clone()
730            },
731        ];
732        for (i, v) in variants.iter().enumerate() {
733            assert_ne!(
734                prism_config_hash(v),
735                h0,
736                "config field {i} must perturb the hash"
737            );
738        }
739    }
740}