Skip to main content

citadel_vector/
segment.rs

1//! The ANNSEG body format: a storage-agnostic byte encoding of everything a
2//! built [`AnnIndex`] holds EXCEPT the f32 vectors. The vectors are rehydrated
3//! at load time from the table rows themselves - the rows are the source of
4//! truth, and the rehydration scan doubles as the staleness proof (it computes
5//! the content fingerprint the storage layer compares against its header).
6//!
7//! Layout: a fixed sequence of REQUIRED sections, each
8//! `[tag u8][len u64 LE][payload][blake3(payload) 32B]`. Per-section hashes
9//! refuse corruption at the section that broke; the storage layer additionally
10//! hashes the whole body. All integers little-endian. Any layout change bumps
11//! the storage header's `format_version` - this module never reads old
12//! formats silently.
13//!
14//! `PointStore.vectors` order is PRISM-INTERNAL (cell-reordered): loaders must
15//! place each scanned row's vector at `inverse(id_map)[row_id]`, never in scan
16//! order - a scan-order fill silently corrupts every f32 rerank.
17
18use rustc_hash::FxHashMap;
19
20use crate::ann::AnnIndex;
21use crate::prism::{
22    BinaryStore, Cell, Graph, Metric, PartitionTree, PointStore, PrismConfig, PrismIndex, SQ8Store,
23};
24
25#[derive(Debug, thiserror::Error)]
26pub enum SegmentError {
27    #[error("segment truncated in {0}")]
28    Truncated(&'static str),
29    #[error("segment section tag mismatch: expected {expected}, got {got}")]
30    BadTag { expected: u8, got: u8 },
31    #[error("segment section {0} failed its BLAKE3 check (corrupt)")]
32    SectionHash(&'static str),
33    #[error("segment metric tag {0} unknown")]
34    BadMetric(u8),
35    #[error("rehydrated vectors length {got} != n*dim {expected}")]
36    VectorLen { expected: usize, got: usize },
37    #[error("rehydration filled {got} of {expected} vector slots")]
38    RehydrationIncomplete { expected: usize, got: usize },
39    #[error("segment internal inconsistency: {0}")]
40    Inconsistent(&'static str),
41}
42
43const TAG_GRAPH: u8 = 1;
44const TAG_LOCAL_GRAPH: u8 = 2;
45const TAG_SQ8: u8 = 3;
46const TAG_BINARY: u8 = 4;
47const TAG_TREE: u8 = 5;
48const TAG_IDS: u8 = 6;
49const TAG_ATTRS: u8 = 7;
50
51/// BLAKE3 of the canonical little-endian encoding of EVERY [`PrismConfig`]
52/// field, domain-separated. The storage header pins this; a binary whose
53/// active config differs must refuse the segment (the graph was built for a
54/// different search geometry).
55pub fn prism_config_hash(cfg: &PrismConfig) -> [u8; 32] {
56    let mut h = blake3::Hasher::new();
57    h.update(b"citadel-annseg-config-v1");
58    for v in [
59        cfg.m_local as u64,
60        cfg.m_greedy as u64,
61        cfg.m_random as u64,
62        cfg.t as u64,
63        cfg.beam_width as u64,
64        cfg.binary_rerank as u64,
65    ] {
66        h.update(&v.to_le_bytes());
67    }
68    for v in [
69        cfg.alpha,
70        cfg.vamana_alpha,
71        cfg.sigma_high,
72        cfg.sigma_low,
73        cfg.beta,
74        cfg.epsilon,
75    ] {
76        h.update(&v.to_le_bytes());
77    }
78    h.update(&[metric_tag(cfg.metric)]);
79    *h.finalize().as_bytes()
80}
81
82pub fn metric_tag(m: Metric) -> u8 {
83    match m {
84        Metric::L2 => 0,
85        Metric::InnerProduct => 1,
86        Metric::Cosine => 2,
87    }
88}
89
90fn metric_from_tag(t: u8) -> Result<Metric, SegmentError> {
91    Ok(match t {
92        0 => Metric::L2,
93        1 => Metric::InnerProduct,
94        2 => Metric::Cosine,
95        other => return Err(SegmentError::BadMetric(other)),
96    })
97}
98
99/// Encode everything but the vectors. The output is the segment BODY; the
100/// storage layer wraps it in its header (fingerprint, config hash, counts).
101pub fn encode(index: &AnnIndex) -> Vec<u8> {
102    let p = index.prism();
103    let mut out = Vec::new();
104
105    section(&mut out, TAG_GRAPH, |b| encode_graph(b, &p.graph));
106    section(&mut out, TAG_LOCAL_GRAPH, |b| {
107        encode_graph(b, &p.local_graph)
108    });
109    section(&mut out, TAG_SQ8, |b| {
110        push_u64(b, p.sq8.dim() as u64);
111        push_slice_u8(b, p.sq8.codes());
112        push_slice_f32(b, p.sq8.mins());
113        push_slice_f32(b, p.sq8.scales());
114    });
115    section(&mut out, TAG_BINARY, |b| {
116        push_u64(b, p.binary.code_words() as u64);
117        push_u64(b, p.binary.block_size() as u64);
118        push_slice_u64(b, p.binary.codes());
119        push_slice_f32(b, p.binary.signs());
120    });
121    section(&mut out, TAG_TREE, |b| {
122        push_u64(b, p.tree.k as u64);
123        push_u64(b, p.tree.split_order.len() as u64);
124        for &s in &p.tree.split_order {
125            push_u64(b, s as u64);
126        }
127        push_u64(b, p.tree.cells.len() as u64);
128        for cell in &p.tree.cells {
129            push_slice_u32(b, &cell.values);
130            push_slice_u32(b, &cell.point_ids);
131        }
132    });
133    section(&mut out, TAG_IDS, |b| {
134        push_u64(b, index.snapshot_max);
135        b.push(metric_tag(index.metric));
136        b.extend_from_slice(&index.dim.to_le_bytes());
137        push_u64(b, u64::from(p.global_medoid));
138        push_slice_u32(b, &p.medoids);
139        push_slice_u32(b, &p.point_cell);
140        push_slice_u32(b, &p.original_ids);
141        push_slice_u64(b, index.id_map());
142    });
143    section(&mut out, TAG_ATTRS, |b| {
144        push_u64(b, p.store.attrs.len() as u64);
145        push_u64(b, p.store.len as u64);
146        for col in &p.store.attrs {
147            push_slice_u32(b, col);
148        }
149    });
150    out
151}
152
153/// Everything a segment carries; vectors arrive separately via
154/// [`SegmentParts::into_index`].
155pub struct SegmentParts {
156    graph: Graph,
157    local_graph: Graph,
158    sq8: SQ8Store,
159    binary: BinaryStore,
160    tree: PartitionTree,
161    snapshot_max: u64,
162    metric: Metric,
163    dim: u16,
164    global_medoid: u32,
165    medoids: Vec<u32>,
166    point_cell: Vec<u32>,
167    original_ids: Vec<u32>,
168    id_map: Vec<u64>,
169    attrs: Vec<Vec<u32>>,
170    n: usize,
171}
172
173impl SegmentParts {
174    pub fn n(&self) -> usize {
175        self.n
176    }
177
178    pub fn dim(&self) -> u16 {
179        self.dim
180    }
181
182    pub fn metric(&self) -> Metric {
183        self.metric
184    }
185
186    pub fn id_map(&self) -> &[u64] {
187        &self.id_map
188    }
189
190    /// `row_id -> PRISM-internal slot`: the PERMUTATION rehydration must use.
191    pub fn internal_of_row(&self) -> FxHashMap<u64, u32> {
192        self.id_map
193            .iter()
194            .enumerate()
195            .map(|(internal, &row)| (row, internal as u32))
196            .collect()
197    }
198
199    /// Finish the index with vectors ALREADY PLACED in PRISM-internal order
200    /// (`filled` = how many slots the loader filled; must be exactly `n`).
201    pub fn into_index(self, vectors: Vec<f32>, filled: usize) -> Result<AnnIndex, SegmentError> {
202        if filled != self.n {
203            return Err(SegmentError::RehydrationIncomplete {
204                expected: self.n,
205                got: filled,
206            });
207        }
208        if vectors.len() != self.n * self.dim as usize {
209            return Err(SegmentError::VectorLen {
210                expected: self.n * self.dim as usize,
211                got: vectors.len(),
212            });
213        }
214        let store = PointStore::from_parts(vectors, self.dim as usize, self.attrs);
215        let prism = PrismIndex {
216            store,
217            tree: self.tree,
218            graph: self.graph,
219            local_graph: self.local_graph,
220            medoids: self.medoids,
221            global_medoid: self.global_medoid,
222            point_cell: self.point_cell,
223            original_ids: self.original_ids,
224            sq8: self.sq8,
225            binary: self.binary,
226            config: AnnIndex::active_config(self.metric),
227        };
228        Ok(AnnIndex::from_parts(
229            prism,
230            self.id_map,
231            self.snapshot_max,
232            self.metric,
233            self.dim,
234        ))
235    }
236}
237
238/// Decode a segment body. Every section's BLAKE3 must verify; any mismatch is
239/// a corruption refusal, never a partial result.
240pub fn decode(bytes: &[u8]) -> Result<SegmentParts, SegmentError> {
241    let mut r = Reader { buf: bytes, at: 0 };
242
243    let g = r.section(TAG_GRAPH, "graph")?;
244    let graph = decode_graph(&mut Reader { buf: g, at: 0 }, "graph")?;
245    let lg = r.section(TAG_LOCAL_GRAPH, "local_graph")?;
246    let local_graph = decode_graph(&mut Reader { buf: lg, at: 0 }, "local_graph")?;
247
248    let s = r.section(TAG_SQ8, "sq8")?;
249    let mut sr = Reader { buf: s, at: 0 };
250    let sq8_dim = sr.u64("sq8")? as usize;
251    let codes = sr.slice_u8("sq8")?.to_vec();
252    let mins = sr.slice_f32("sq8")?;
253    let scales = sr.slice_f32("sq8")?;
254    let sq8 = SQ8Store::from_parts(codes, mins, scales, sq8_dim);
255
256    let b = r.section(TAG_BINARY, "binary")?;
257    let mut br = Reader { buf: b, at: 0 };
258    let code_words = br.u64("binary")? as usize;
259    let block_size = br.u64("binary")? as usize;
260    let bcodes = br.slice_u64("binary")?;
261    let signs = br.slice_f32("binary")?;
262    let binary = BinaryStore::from_parts(bcodes, code_words, signs, block_size);
263
264    let t = r.section(TAG_TREE, "tree")?;
265    let mut tr = Reader { buf: t, at: 0 };
266    let k = tr.u64("tree")? as usize;
267    let so_len = tr.u64("tree")? as usize;
268    let mut split_order = Vec::with_capacity(so_len);
269    for _ in 0..so_len {
270        split_order.push(tr.u64("tree")? as usize);
271    }
272    let cells_len = tr.u64("tree")? as usize;
273    let mut cells = Vec::with_capacity(cells_len);
274    for _ in 0..cells_len {
275        let values = tr.slice_u32("tree")?;
276        let point_ids = tr.slice_u32("tree")?;
277        cells.push(Cell { values, point_ids });
278    }
279    let tree = PartitionTree {
280        cells,
281        split_order,
282        k,
283    };
284
285    let i = r.section(TAG_IDS, "ids")?;
286    let mut ir = Reader { buf: i, at: 0 };
287    let snapshot_max = ir.u64("ids")?;
288    let metric = metric_from_tag(ir.u8("ids")?)?;
289    let dim = ir.u16("ids")?;
290    let global_medoid = ir.u64("ids")? as u32;
291    let medoids = ir.slice_u32("ids")?;
292    let point_cell = ir.slice_u32("ids")?;
293    let original_ids = ir.slice_u32("ids")?;
294    let id_map = ir.slice_u64("ids")?;
295
296    let a = r.section(TAG_ATTRS, "attrs")?;
297    let mut ar = Reader { buf: a, at: 0 };
298    let attr_k = ar.u64("attrs")? as usize;
299    let n = ar.u64("attrs")? as usize;
300    let mut attrs = Vec::with_capacity(attr_k);
301    for _ in 0..attr_k {
302        let col = ar.slice_u32("attrs")?;
303        if col.len() != n {
304            return Err(SegmentError::Inconsistent("attr column length != n"));
305        }
306        attrs.push(col);
307    }
308
309    if id_map.len() != n || original_ids.len() != n || point_cell.len() != n {
310        return Err(SegmentError::Inconsistent("id arrays disagree on n"));
311    }
312    Ok(SegmentParts {
313        graph,
314        local_graph,
315        sq8,
316        binary,
317        tree,
318        snapshot_max,
319        metric,
320        dim,
321        global_medoid,
322        medoids,
323        point_cell,
324        original_ids,
325        id_map,
326        attrs,
327        n,
328    })
329}
330
331fn encode_graph(b: &mut Vec<u8>, g: &Graph) {
332    push_u64(b, g.n as u64);
333    push_slice_u32(b, &g.offsets);
334    push_slice_u32(b, &g.neighbors);
335}
336
337fn decode_graph(r: &mut Reader<'_>, what: &'static str) -> Result<Graph, SegmentError> {
338    let n = r.u64(what)? as usize;
339    let offsets = r.slice_u32(what)?;
340    let neighbors = r.slice_u32(what)?;
341    if offsets.len() != n + 1 {
342        return Err(SegmentError::Inconsistent("graph offsets length != n+1"));
343    }
344    Ok(Graph {
345        offsets,
346        neighbors,
347        n,
348    })
349}
350
351fn section(out: &mut Vec<u8>, tag: u8, fill: impl FnOnce(&mut Vec<u8>)) {
352    let mut payload = Vec::new();
353    fill(&mut payload);
354    out.push(tag);
355    push_u64(out, payload.len() as u64);
356    let hash = blake3::hash(&payload);
357    out.extend_from_slice(&payload);
358    out.extend_from_slice(hash.as_bytes());
359}
360
361fn push_u64(b: &mut Vec<u8>, v: u64) {
362    b.extend_from_slice(&v.to_le_bytes());
363}
364
365fn push_slice_u8(b: &mut Vec<u8>, s: &[u8]) {
366    push_u64(b, s.len() as u64);
367    b.extend_from_slice(s);
368}
369
370fn push_slice_u32(b: &mut Vec<u8>, s: &[u32]) {
371    push_u64(b, s.len() as u64);
372    for &v in s {
373        b.extend_from_slice(&v.to_le_bytes());
374    }
375}
376
377fn push_slice_u64(b: &mut Vec<u8>, s: &[u64]) {
378    push_u64(b, s.len() as u64);
379    for &v in s {
380        b.extend_from_slice(&v.to_le_bytes());
381    }
382}
383
384fn push_slice_f32(b: &mut Vec<u8>, s: &[f32]) {
385    push_u64(b, s.len() as u64);
386    for &v in s {
387        b.extend_from_slice(&v.to_le_bytes());
388    }
389}
390
391struct Reader<'a> {
392    buf: &'a [u8],
393    at: usize,
394}
395
396impl<'a> Reader<'a> {
397    fn take(&mut self, n: usize, what: &'static str) -> Result<&'a [u8], SegmentError> {
398        let end = self
399            .at
400            .checked_add(n)
401            .filter(|&e| e <= self.buf.len())
402            .ok_or(SegmentError::Truncated(what))?;
403        let s = &self.buf[self.at..end];
404        self.at = end;
405        Ok(s)
406    }
407
408    fn u8(&mut self, what: &'static str) -> Result<u8, SegmentError> {
409        Ok(self.take(1, what)?[0])
410    }
411
412    fn u16(&mut self, what: &'static str) -> Result<u16, SegmentError> {
413        Ok(u16::from_le_bytes(self.take(2, what)?.try_into().unwrap()))
414    }
415
416    fn u64(&mut self, what: &'static str) -> Result<u64, SegmentError> {
417        Ok(u64::from_le_bytes(self.take(8, what)?.try_into().unwrap()))
418    }
419
420    /// One framed section: tag + length + payload + verified BLAKE3.
421    fn section(&mut self, tag: u8, what: &'static str) -> Result<&'a [u8], SegmentError> {
422        let got = self.u8(what)?;
423        if got != tag {
424            return Err(SegmentError::BadTag { expected: tag, got });
425        }
426        let len = self.u64(what)? as usize;
427        let payload = self.take(len, what)?;
428        let hash: [u8; 32] = self.take(32, what)?.try_into().unwrap();
429        if *blake3::hash(payload).as_bytes() != hash {
430            return Err(SegmentError::SectionHash(what));
431        }
432        Ok(payload)
433    }
434
435    fn slice_u8(&mut self, what: &'static str) -> Result<&'a [u8], SegmentError> {
436        let len = self.u64(what)? as usize;
437        self.take(len, what)
438    }
439
440    fn slice_u32(&mut self, what: &'static str) -> Result<Vec<u32>, SegmentError> {
441        let len = self.u64(what)? as usize;
442        let raw = self.take(
443            len.checked_mul(4).ok_or(SegmentError::Truncated(what))?,
444            what,
445        )?;
446        Ok(raw
447            .chunks_exact(4)
448            .map(|c| u32::from_le_bytes(c.try_into().unwrap()))
449            .collect())
450    }
451
452    fn slice_u64(&mut self, what: &'static str) -> Result<Vec<u64>, SegmentError> {
453        let len = self.u64(what)? as usize;
454        let raw = self.take(
455            len.checked_mul(8).ok_or(SegmentError::Truncated(what))?,
456            what,
457        )?;
458        Ok(raw
459            .chunks_exact(8)
460            .map(|c| u64::from_le_bytes(c.try_into().unwrap()))
461            .collect())
462    }
463
464    fn slice_f32(&mut self, what: &'static str) -> Result<Vec<f32>, SegmentError> {
465        let len = self.u64(what)? as usize;
466        let raw = self.take(
467            len.checked_mul(4).ok_or(SegmentError::Truncated(what))?,
468            what,
469        )?;
470        Ok(raw
471            .chunks_exact(4)
472            .map(|c| f32::from_le_bytes(c.try_into().unwrap()))
473            .collect())
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480
481    /// A small index with two attribute cells, NON-monotonic row ids (so
482    /// id_map order != insertion order), and deterministic vectors.
483    fn build_fixture() -> AnnIndex {
484        let mut rows: Vec<(u64, Vec<f32>, Vec<u32>)> = Vec::new();
485        for i in 0..200u64 {
486            // Reverse-ish ids: external order differs from internal.
487            let id = 1000 - i * 3;
488            let v: Vec<f32> = (0..8).map(|d| ((i * 7 + d) % 23) as f32 * 0.5).collect();
489            rows.push((id, v, vec![(i % 2) as u32]));
490        }
491        AnnIndex::build_with_attrs(rows, 1, Metric::Cosine, 8).expect("build fixture")
492    }
493
494    /// Rehydrate exactly as the storage loader will: by the id_map PERMUTATION.
495    fn rehydrate(index: &AnnIndex, parts: &SegmentParts) -> (Vec<f32>, usize) {
496        let inv = parts.internal_of_row();
497        let dim = parts.dim() as usize;
498        let mut vectors = vec![0.0f32; parts.n() * dim];
499        let mut filled = 0;
500        // Source the vectors from the ORIGINAL index's store, keyed by row id,
501        // simulating the table scan (arbitrary order: ascending row id).
502        let p = index.prism();
503        for internal in 0..parts.n() {
504            let row = index.id_map()[internal];
505            let slot = inv[&row] as usize;
506            let src = &p.store.vectors[internal * dim..(internal + 1) * dim];
507            vectors[slot * dim..(slot + 1) * dim].copy_from_slice(src);
508            filled += 1;
509        }
510        (vectors, filled)
511    }
512
513    #[test]
514    fn roundtrip_preserves_filtered_search_results_exactly() {
515        // Attribute-filtered search exercises the persisted tree + attrs +
516        // dicts machinery, not just the graph.
517        let index = build_fixture();
518        let parts = decode(&encode(&index)).expect("decode");
519        let (vectors, filled) = rehydrate(&index, &parts);
520        let loaded = parts.into_index(vectors, filled).expect("into_index");
521        let query: Vec<f32> = (0..8).map(|d| d as f32 * 0.7).collect();
522        for code in [0u32, 1] {
523            let filter = crate::prism::Filter::new(vec![(0, vec![code])]);
524            let a = index.search_filtered(&query, 8, 64, &filter);
525            let b = loaded.search_filtered(&query, 8, 64, &filter);
526            assert_eq!(a, b, "filtered (attr0={code}) results identical");
527            assert!(!a.is_empty(), "filter {code} matches half the fixture");
528        }
529    }
530
531    #[test]
532    fn roundtrip_holds_for_every_metric() {
533        for metric in [Metric::L2, Metric::InnerProduct, Metric::Cosine] {
534            let rows: Vec<(u64, Vec<f32>, Vec<u32>)> = (0..60u64)
535                .map(|i| {
536                    let v: Vec<f32> = (0..4).map(|d| ((i + d) % 13) as f32 - 6.0).collect();
537                    (i * 2 + 1, v, vec![0])
538                })
539                .collect();
540            let index = AnnIndex::build_with_attrs(rows, 1, metric, 4).expect("build");
541            let parts = decode(&encode(&index)).expect("decode");
542            assert_eq!(parts.metric(), metric, "metric tag survives");
543            let (vectors, filled) = rehydrate(&index, &parts);
544            let loaded = parts.into_index(vectors, filled).expect("into_index");
545            let q = [1.0f32, -2.0, 3.0, 0.5];
546            assert_eq!(index.search(&q, 5), loaded.search(&q, 5), "{metric:?}");
547        }
548    }
549
550    #[test]
551    fn single_row_index_roundtrips() {
552        let index =
553            AnnIndex::build_with_attrs(vec![(42, vec![1.0, 2.0], vec![0])], 1, Metric::L2, 2)
554                .expect("build single");
555        let parts = decode(&encode(&index)).expect("decode");
556        assert_eq!(parts.n(), 1);
557        let (vectors, filled) = rehydrate(&index, &parts);
558        let loaded = parts.into_index(vectors, filled).expect("into_index");
559        assert_eq!(loaded.search(&[1.0, 2.0], 1), vec![(42, 0.0)]);
560    }
561
562    #[test]
563    fn truncation_at_every_byte_boundary_is_refused() {
564        // Cutting the segment ANYWHERE must produce an error, never a panic or
565        // a silently partial decode.
566        let index = AnnIndex::build_with_attrs(
567            (0..12u64)
568                .map(|i| (i, vec![i as f32, 1.0], vec![0]))
569                .collect(),
570            1,
571            Metric::L2,
572            2,
573        )
574        .expect("build");
575        let bytes = encode(&index);
576        for cut in 0..bytes.len() {
577            assert!(
578                decode(&bytes[..cut]).is_err(),
579                "truncation at {cut}/{} must refuse",
580                bytes.len()
581            );
582        }
583    }
584
585    #[test]
586    fn internal_of_row_is_a_complete_bijection() {
587        let index = build_fixture();
588        let parts = decode(&encode(&index)).expect("decode");
589        let map = parts.internal_of_row();
590        assert_eq!(map.len(), parts.n(), "every row maps");
591        let mut slots: Vec<u32> = map.values().copied().collect();
592        slots.sort_unstable();
593        let expected: Vec<u32> = (0..parts.n() as u32).collect();
594        assert_eq!(slots, expected, "slots form a permutation of 0..n");
595    }
596
597    #[test]
598    fn wrong_vector_length_is_refused() {
599        let index = build_fixture();
600        let parts = decode(&encode(&index)).expect("decode");
601        let n = parts.n();
602        let too_short = vec![0.0f32; (n - 1) * 8];
603        assert!(matches!(
604            parts.into_index(too_short, n),
605            Err(SegmentError::VectorLen { .. })
606        ));
607    }
608
609    #[test]
610    fn roundtrip_preserves_search_results_exactly() {
611        let index = build_fixture();
612        let bytes = encode(&index);
613        let parts = decode(&bytes).expect("decode");
614        let (vectors, filled) = rehydrate(&index, &parts);
615        let loaded = parts.into_index(vectors, filled).expect("into_index");
616
617        let query: Vec<f32> = (0..8).map(|d| d as f32 * 0.3).collect();
618        let a = index.search(&query, 10);
619        let b = loaded.search(&query, 10);
620        assert_eq!(a, b, "loaded index must answer EXACTLY like the original");
621        assert_eq!(index.snapshot_max, loaded.snapshot_max);
622        assert_eq!(index.id_map(), loaded.id_map());
623    }
624
625    #[test]
626    fn every_section_corruption_is_refused() {
627        let index = build_fixture();
628        let bytes = encode(&index);
629        // Flip one byte inside each section's payload region and expect a
630        // refusal each time (walk the framing to find payload offsets).
631        let mut at = 0usize;
632        let mut payload_spots = Vec::new();
633        while at < bytes.len() {
634            let len = u64::from_le_bytes(bytes[at + 1..at + 9].try_into().unwrap()) as usize;
635            payload_spots.push(at + 9 + len / 2);
636            at += 1 + 8 + len + 32;
637        }
638        assert_eq!(payload_spots.len(), 7, "all seven sections present");
639        for spot in payload_spots {
640            let mut corrupt = bytes.clone();
641            corrupt[spot] ^= 0xFF;
642            assert!(
643                matches!(decode(&corrupt), Err(SegmentError::SectionHash(_))),
644                "corruption at {spot} must be refused"
645            );
646        }
647    }
648
649    #[test]
650    fn incomplete_rehydration_is_refused() {
651        let index = build_fixture();
652        let parts = decode(&encode(&index)).expect("decode");
653        let dim = parts.dim() as usize;
654        let n = parts.n();
655        let vectors = vec![0.0f32; n * dim];
656        assert!(matches!(
657            parts.into_index(vectors, n - 1),
658            Err(SegmentError::RehydrationIncomplete { .. })
659        ));
660    }
661
662    #[test]
663    fn config_hash_is_sensitive_to_every_field() {
664        let base = AnnIndex::active_config(Metric::Cosine);
665        let h0 = prism_config_hash(&base);
666        let variants: Vec<PrismConfig> = vec![
667            PrismConfig {
668                m_local: base.m_local + 1,
669                ..base.clone()
670            },
671            PrismConfig {
672                m_greedy: base.m_greedy + 1,
673                ..base.clone()
674            },
675            PrismConfig {
676                m_random: base.m_random + 2,
677                ..base.clone()
678            },
679            PrismConfig {
680                t: base.t + 1,
681                ..base.clone()
682            },
683            PrismConfig {
684                alpha: base.alpha + 0.5,
685                ..base.clone()
686            },
687            PrismConfig {
688                vamana_alpha: base.vamana_alpha + 0.5,
689                ..base.clone()
690            },
691            PrismConfig {
692                beam_width: base.beam_width + 1,
693                ..base.clone()
694            },
695            PrismConfig {
696                metric: Metric::L2,
697                ..base.clone()
698            },
699            PrismConfig {
700                sigma_high: base.sigma_high + 0.25,
701                ..base.clone()
702            },
703            PrismConfig {
704                sigma_low: base.sigma_low + 0.25,
705                ..base.clone()
706            },
707            PrismConfig {
708                beta: base.beta + 0.5,
709                ..base.clone()
710            },
711            PrismConfig {
712                epsilon: base.epsilon + 0.5,
713                ..base.clone()
714            },
715            PrismConfig {
716                binary_rerank: base.binary_rerank + 1,
717                ..base.clone()
718            },
719        ];
720        for (i, v) in variants.iter().enumerate() {
721            assert_ne!(
722                prism_config_hash(v),
723                h0,
724                "config field {i} must perturb the hash"
725            );
726        }
727    }
728}