pub const HNSW_CHUNK_BIT: i64 = 1 << 62;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HnswIdKind {
Episode,
Chunk,
}
#[inline]
pub fn episode_hnsw_id(episode_rowid: i64) -> i64 {
debug_assert!(
episode_rowid >= 0,
"episode rowid must be non-negative; got {episode_rowid}"
);
debug_assert!(
episode_rowid & HNSW_CHUNK_BIT == 0,
"episode rowid {episode_rowid} carries the chunk bit; SQLite shouldn't produce rowids ≥ 2^62"
);
episode_rowid
}
#[inline]
pub fn chunk_hnsw_id(chunk_rowid: i64) -> i64 {
debug_assert!(
chunk_rowid >= 0,
"chunk rowid must be non-negative; got {chunk_rowid}"
);
debug_assert!(
chunk_rowid & HNSW_CHUNK_BIT == 0,
"chunk rowid {chunk_rowid} carries the chunk bit; SQLite shouldn't produce rowids ≥ 2^62"
);
chunk_rowid | HNSW_CHUNK_BIT
}
#[inline]
pub fn decode_hnsw_id(hnsw_id: i64) -> (HnswIdKind, i64) {
if hnsw_id & HNSW_CHUNK_BIT != 0 {
(HnswIdKind::Chunk, hnsw_id & !HNSW_CHUNK_BIT)
} else {
(HnswIdKind::Episode, hnsw_id)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn episode_id_is_identity_for_low_rowids() {
for rowid in [0i64, 1, 2, 42, 1_000, 1_000_000, (1_i64 << 30)] {
let enc = episode_hnsw_id(rowid);
assert_eq!(enc, rowid, "episode_hnsw_id must be identity for rowid={rowid}");
let (kind, decoded) = decode_hnsw_id(enc);
assert_eq!(kind, HnswIdKind::Episode);
assert_eq!(decoded, rowid);
}
}
#[test]
fn chunk_id_sets_high_bit() {
for rowid in [0i64, 1, 2, 42, 1_000, 1_000_000, (1_i64 << 30)] {
let enc = chunk_hnsw_id(rowid);
assert_ne!(enc, rowid, "chunk_hnsw_id must differ from input rowid={rowid}");
assert_eq!(
enc & HNSW_CHUNK_BIT,
HNSW_CHUNK_BIT,
"chunk_hnsw_id must set the chunk bit (rowid={rowid})"
);
let (kind, decoded) = decode_hnsw_id(enc);
assert_eq!(kind, HnswIdKind::Chunk);
assert_eq!(decoded, rowid);
}
}
#[test]
fn episode_and_chunk_with_same_rowid_have_distinct_hnsw_ids() {
let rowid = 1i64;
let ep = episode_hnsw_id(rowid);
let chunk = chunk_hnsw_id(rowid);
assert_ne!(ep, chunk, "episode and chunk with rowid=1 must encode differently");
let (kind_ep, decoded_ep) = decode_hnsw_id(ep);
let (kind_chunk, decoded_chunk) = decode_hnsw_id(chunk);
assert_eq!(decoded_ep, rowid);
assert_eq!(decoded_chunk, rowid);
assert_eq!(kind_ep, HnswIdKind::Episode);
assert_eq!(kind_chunk, HnswIdKind::Chunk);
}
#[test]
fn decode_legacy_episode_id_zero_is_episode() {
let (kind, decoded) = decode_hnsw_id(0);
assert_eq!(kind, HnswIdKind::Episode);
assert_eq!(decoded, 0);
}
#[test]
fn chunk_bit_value_is_2_pow_62() {
assert_eq!(HNSW_CHUNK_BIT, 1i64 << 62);
assert!(HNSW_CHUNK_BIT > 0);
}
#[test]
#[should_panic(expected = "non-negative")]
fn episode_negative_rowid_panics_in_debug() {
let _ = episode_hnsw_id(-1);
}
#[test]
#[should_panic(expected = "non-negative")]
fn chunk_negative_rowid_panics_in_debug() {
let _ = chunk_hnsw_id(-1);
}
#[test]
#[should_panic(expected = "chunk bit")]
fn episode_rowid_with_chunk_bit_panics_in_debug() {
let _ = episode_hnsw_id(HNSW_CHUNK_BIT | 1);
}
#[test]
#[should_panic(expected = "chunk bit")]
fn chunk_rowid_with_chunk_bit_panics_in_debug() {
let _ = chunk_hnsw_id(HNSW_CHUNK_BIT | 1);
}
}