nodedb_vector/collection/segment.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! Segment types for the VectorCollection lifecycle.
4
5use crate::collection::tier::StorageTier;
6use crate::flat::FlatIndex;
7use crate::hnsw::{HnswIndex, HnswParams};
8use crate::mmap_segment::MmapVectorSegment;
9use crate::quantize::pq::PqCodec;
10use crate::quantize::sq8::Sq8Codec;
11
12/// Default threshold for sealing the growing segment.
13/// 64K vectors × 768 dims × 4 bytes = ~192 MiB per segment.
14pub const DEFAULT_SEAL_THRESHOLD: usize = 65_536;
15
16/// Request to build an HNSW index from sealed vectors (sent to builder thread).
17pub struct BuildRequest {
18 pub key: String,
19 pub segment_id: u32,
20 pub vectors: Vec<Vec<f32>>,
21 pub dim: usize,
22 pub params: HnswParams,
23}
24
25/// Completed HNSW build (sent back from builder thread).
26pub struct BuildComplete {
27 pub key: String,
28 pub segment_id: u32,
29 pub index: HnswIndex,
30}
31
32/// A sealed segment whose HNSW index is being built in background.
33pub struct BuildingSegment {
34 /// Flat index for brute-force search while HNSW is building.
35 pub flat: FlatIndex,
36 /// Base ID offset: vectors have global IDs [base_id .. base_id + count).
37 pub base_id: u32,
38 /// Unique segment identifier (for matching with BuildComplete).
39 pub segment_id: u32,
40}
41
42/// A sealed segment with a completed HNSW index.
43pub struct SealedSegment {
44 /// Built HNSW index (immutable after construction).
45 pub index: HnswIndex,
46 /// Base ID offset.
47 pub base_id: u32,
48 /// Optional SQ8 quantized vectors for accelerated traversal.
49 pub sq8: Option<(Sq8Codec, Vec<u8>)>,
50 /// Optional PQ-compressed codes (for HnswPq-configured indexes).
51 pub pq: Option<(PqCodec, Vec<u8>)>,
52 /// Storage tier: L0Ram = FP32 in HNSW nodes, L1Nvme = FP32 in mmap file.
53 pub tier: StorageTier,
54 /// mmap-backed vector segment for L1 NVMe tier.
55 pub mmap_vectors: Option<MmapVectorSegment>,
56}