const DOC_ID_SHIFT: u32 = 1;
const TF_SHIFT: u32 = 33;
const PFOR_OFFSET_SHIFT: u32 = 1;
const PFOR_OFFSET_BITS: u32 = 42;
const PFOR_LENGTH_SHIFT: u32 = PFOR_OFFSET_SHIFT + PFOR_OFFSET_BITS;
const PFOR_LENGTH_BITS: u32 = 21;
const PFOR_OFFSET_MAX: u64 = (1u64 << PFOR_OFFSET_BITS) - 1;
pub(crate) const PFOR_LENGTH_MAX: u32 = (1u32 << PFOR_LENGTH_BITS) - 1;
pub(crate) const INLINE_TF_MAX: u32 = (1 << 30) - 1;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub(crate) enum FstValue {
Pfor {
metadata_offset: u64,
postings_length: u32,
},
Inline { doc_id: u32, tf: u32 },
}
impl FstValue {
#[inline]
pub(crate) fn unpack(packed: u64) -> Self {
if packed & 1 == 0 {
Self::Pfor {
metadata_offset: (packed >> PFOR_OFFSET_SHIFT) & PFOR_OFFSET_MAX,
postings_length: ((packed >> PFOR_LENGTH_SHIFT) as u32) & PFOR_LENGTH_MAX,
}
} else {
let doc_id = (packed >> DOC_ID_SHIFT) as u32;
let tf = ((packed >> TF_SHIFT) as u32) & INLINE_TF_MAX;
Self::Inline { doc_id, tf }
}
}
#[inline]
pub(crate) fn pack_pfor(metadata_offset: u64, postings_length: u32) -> u64 {
assert!(
metadata_offset <= PFOR_OFFSET_MAX,
"metadata_offset {metadata_offset} overflows the {PFOR_OFFSET_BITS}-bit PFOR slot"
);
assert!(
postings_length <= PFOR_LENGTH_MAX,
"postings_length {postings_length} overflows the {PFOR_LENGTH_BITS}-bit PFOR slot"
);
(metadata_offset << PFOR_OFFSET_SHIFT) | ((postings_length as u64) << PFOR_LENGTH_SHIFT)
}
#[inline]
pub(crate) fn pack_inline(doc_id: u32, tf: u32) -> u64 {
assert!(
tf <= INLINE_TF_MAX,
"tf {tf} overflows the inline 30-bit slot (max {INLINE_TF_MAX})"
);
1 | ((doc_id as u64) << DOC_ID_SHIFT) | ((tf as u64) << TF_SHIFT)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn pfor_round_trip() {
for &(offset, len) in &[
(0u64, 20u32),
(1, 128),
(20, 4096),
(1 << 20, 1 << 16),
((1u64 << 34) - 1, (1 << 20) - 1),
(1u64 << 34, PFOR_LENGTH_MAX),
] {
let packed = FstValue::pack_pfor(offset, len);
assert_eq!(packed & 1, 0, "PFOR form must have low bit clear");
assert_eq!(
FstValue::unpack(packed),
FstValue::Pfor {
metadata_offset: offset,
postings_length: len
}
);
}
}
#[test]
fn inline_round_trip() {
let cases = [
(0u32, 0u32),
(1, 1),
(500_000, 7),
(u32::MAX, INLINE_TF_MAX),
];
for &(doc_id, tf) in &cases {
let packed = FstValue::pack_inline(doc_id, tf);
assert_eq!(packed & 1, 1, "inline form must have low bit set");
assert_eq!(FstValue::unpack(packed), FstValue::Inline { doc_id, tf });
}
}
#[test]
#[should_panic(expected = "overflows the inline 30-bit slot")]
fn inline_tf_overflow_panics() {
let _ = FstValue::pack_inline(0, INLINE_TF_MAX + 1);
}
#[test]
fn flag_bit_distinguishes_forms() {
let pfor = FstValue::pack_pfor(42, 128);
let inline = FstValue::pack_inline(42, 7);
assert_ne!(pfor & 1, inline & 1);
}
}