pub mod checksum;
pub mod footer;
pub const PROJECT_MAGIC: &[u8; 3] = b"INF";
pub const FORMAT_VERSION: &str = "1.0.0";
pub const CRC_BYTES: usize = 4;
pub mod fts {
pub const MAGIC: &[u8; 8] = b"INFFTS01";
pub const VERSION: u32 = 1;
pub const AVGDL_FIXED_POINT_SCALE: f32 = 1000.0;
pub const BLOCK_MAX_BM25_FIXED_POINT_SCALE: f32 = 1000.0;
pub const HEADER_SIZE: usize = 48;
pub const MAGIC_BYTES: usize = 8;
pub const U32_BYTES: usize = 4;
pub const U64_BYTES: usize = 8;
pub mod hdr {
pub const VERSION_OFF: usize = 8;
pub const N_COLUMNS_OFF: usize = 12;
pub const N_DOCS_OFF: usize = 16;
pub const N_TERMS_OFF: usize = 20;
pub const FST_OFFSET_OFF: usize = 24;
pub const POSTINGS_OFFSET_OFF: usize = 32;
pub const DOC_LENGTHS_DIR_OFF: usize = 40;
}
pub mod term_meta {
pub const DF_OFF: usize = 0;
pub const POSTINGS_LENGTH_OFF: usize = 12;
pub const NUM_BLOCKS_OFF: usize = 16;
}
pub mod skip_entry {
pub const LAST_DOC_ID_OFF: usize = 0;
pub const BLOCK_OFFSET_OFF: usize = 4;
pub const MAX_BM25_OFF: usize = 8;
}
}
pub mod vec {
pub const OUTER_MAGIC: &[u8; 8] = b"INFVEC01";
pub const SUB_MAGIC: &[u8; 8] = b"INFVECC1";
pub const DOC_ID_BYTES: usize = 4;
pub const VERSION: u32 = 1;
pub const SUBSECTION_VERSION: u32 = 2;
pub const U32_BYTES: usize = 4;
pub const U64_BYTES: usize = 8;
pub const MAGIC_BYTES: usize = 8;
pub const OUTER_HEADER_SIZE: usize = 32;
pub const DIR_ENTRY_SIZE: usize = 64;
pub const SUB_HEADER_SIZE: usize = 56;
pub const METRIC_ID_L2SQ: u32 = 0;
pub const METRIC_ID_COSINE: u32 = 1;
pub const METRIC_ID_NEGDOT: u32 = 2;
pub const CLUSTER_IDX_ENTRY_BYTES: usize = 8;
pub const CLUSTER_IDX_COUNT_OFFSET: usize = 4;
pub mod outer_hdr {
pub const VERSION_OFF: usize = 8;
pub const N_COLUMNS_OFF: usize = 12;
pub const N_DOCS_OFF: usize = 16;
pub const DIR_OFFSET_OFF: usize = 24;
}
pub mod dir_entry {
pub const DIM_OFF: usize = 4;
pub const N_CENT_OFF: usize = 8;
pub const METRIC_ID_OFF: usize = 12;
pub const ROT_SEED_OFF: usize = 16;
pub const SUBSECTION_OFF_OFF: usize = 24;
pub const SUBSECTION_LEN_OFF: usize = 32;
pub const SUMMARY_ABS_OFF: usize = 40;
pub const CODEC_ID_OFF: usize = 52;
pub const CODEC_META_OFF_OFF: usize = 56;
pub const CODEC_META_SIZE_OFF: usize = 60;
}
pub mod sub_hdr {
pub const VERSION_OFF: usize = 8;
pub const CODEC_META_SIZE_OFF: usize = 12;
pub const SUMMARY_OFF_OFF: usize = 16;
pub const SUMMARY_RADIUS_X100_OFF: usize = 24;
pub const CENTROIDS_OFF_OFF: usize = 32;
pub const CLUSTER_IDX_OFF_OFF: usize = 40;
pub const PER_CLUSTER_BLOCKS_OFF_OFF: usize = 48;
}
}
pub mod kv {
pub const FORMAT: &str = "inf.format";
pub const FORMAT_VERSION: &str = "inf.format_version";
pub const ID_COLUMN: &str = "inf.id_column";
pub const N_DOCS: &str = "inf.n_docs";
pub const BUILDER: &str = "inf.builder";
pub const FTS_OFFSET: &str = "inf.fts.offset";
pub const FTS_LENGTH: &str = "inf.fts.length";
pub const FTS_COLUMNS: &str = "inf.fts.columns";
pub const VEC_OFFSET: &str = "inf.vec.offset";
pub const VEC_LENGTH: &str = "inf.vec.length";
pub const VEC_COLUMNS: &str = "inf.vec.columns";
pub const FORMAT_VALUE: &str = "infino-superfile";
pub const REQUIRED: &[&str] = &[FORMAT, FORMAT_VERSION, ID_COLUMN, N_DOCS, BUILDER];
pub const FTS_KEYS: &[&str] = &[FTS_OFFSET, FTS_LENGTH, FTS_COLUMNS];
pub const VEC_KEYS: &[&str] = &[VEC_OFFSET, VEC_LENGTH, VEC_COLUMNS];
pub const ALL: &[&str] = &[
FORMAT,
FORMAT_VERSION,
ID_COLUMN,
N_DOCS,
BUILDER,
FTS_OFFSET,
FTS_LENGTH,
FTS_COLUMNS,
VEC_OFFSET,
VEC_LENGTH,
VEC_COLUMNS,
];
}
pub const RESERVED_PREFIX: &str = "inf.";
pub const FST_SEPARATOR: u8 = 0x1F;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Version {
pub major: u32,
pub minor: u32,
pub patch: u32,
}
impl Version {
pub fn parse(s: &str) -> Option<Self> {
let parts: Vec<&str> = s.split('.').collect();
if parts.len() != 3 {
return None;
}
Some(Version {
major: parts[0].parse().ok()?,
minor: parts[1].parse().ok()?,
patch: parts[2].parse().ok()?,
})
}
pub fn is_compatible_with_current(&self) -> bool {
let current =
Version::parse(FORMAT_VERSION).expect("FORMAT_VERSION is a valid semver constant");
self.major == current.major
}
}
#[cfg(test)]
mod tests {
use std::collections::HashSet;
use super::*;
#[test]
fn project_magic_is_three_bytes() {
assert_eq!(PROJECT_MAGIC, b"INF");
assert_eq!(PROJECT_MAGIC.len(), 3);
}
#[test]
fn fts_magic_starts_with_project_magic() {
assert_eq!(&fts::MAGIC[0..3], PROJECT_MAGIC);
assert_eq!(fts::MAGIC, b"INFFTS01");
assert_eq!(fts::MAGIC.len(), 8);
}
#[test]
fn vec_outer_magic_starts_with_project_magic() {
assert_eq!(&vec::OUTER_MAGIC[0..3], PROJECT_MAGIC);
assert_eq!(vec::OUTER_MAGIC, b"INFVEC01");
}
#[test]
fn vec_sub_magic_starts_with_project_magic() {
assert_eq!(&vec::SUB_MAGIC[0..3], PROJECT_MAGIC);
assert_eq!(vec::SUB_MAGIC, b"INFVECC1");
}
#[test]
fn three_magics_are_distinct() {
let m: HashSet<&[u8]> = [
fts::MAGIC.as_slice(),
vec::OUTER_MAGIC.as_slice(),
vec::SUB_MAGIC.as_slice(),
]
.into_iter()
.collect();
assert_eq!(
m.len(),
3,
"FTS / vec-outer / vec-sub magics must be distinct"
);
}
#[test]
fn all_kv_keys_have_inf_prefix() {
for k in kv::ALL {
assert!(
k.starts_with(RESERVED_PREFIX),
"KV key {k:?} should start with {RESERVED_PREFIX:?}"
);
}
}
#[test]
fn all_kv_keys_are_unique() {
let set: HashSet<&&str> = kv::ALL.iter().collect();
assert_eq!(set.len(), kv::ALL.len(), "duplicate KV key in kv::ALL");
}
#[test]
fn required_kv_keys_present_in_all() {
for k in kv::REQUIRED {
assert!(
kv::ALL.contains(k),
"required key {k:?} missing from kv::ALL"
);
}
}
#[test]
fn fts_and_vec_key_groups_present_in_all() {
for k in kv::FTS_KEYS {
assert!(kv::ALL.contains(k));
}
for k in kv::VEC_KEYS {
assert!(kv::ALL.contains(k));
}
}
#[test]
fn version_parses_strict_semver() {
assert_eq!(
Version::parse("1.0.0"),
Some(Version {
major: 1,
minor: 0,
patch: 0
})
);
assert_eq!(
Version::parse("12.34.567"),
Some(Version {
major: 12,
minor: 34,
patch: 567
})
);
}
#[test]
fn version_rejects_malformed_strings() {
assert_eq!(Version::parse(""), None);
assert_eq!(Version::parse("1"), None);
assert_eq!(Version::parse("1.0"), None);
assert_eq!(Version::parse("1.0.0.0"), None);
assert_eq!(Version::parse("a.b.c"), None);
assert_eq!(Version::parse("1.0.x"), None);
assert_eq!(Version::parse("1.0.0-alpha"), None);
assert_eq!(Version::parse("1.0.0+sha"), None);
assert_eq!(Version::parse("-1.0.0"), None);
assert_eq!(Version::parse(" 1.0.0"), None);
assert_eq!(Version::parse("1.0.0 "), None);
}
#[test]
fn current_format_version_is_valid_semver() {
assert!(Version::parse(FORMAT_VERSION).is_some());
}
#[test]
fn version_compat_matches_on_major() {
let v = Version::parse(FORMAT_VERSION).expect("parse Version");
assert!(v.is_compatible_with_current());
let v2 = Version {
major: v.major,
minor: v.minor + 99,
patch: v.patch + 99,
};
assert!(
v2.is_compatible_with_current(),
"minor/patch drift is compatible"
);
let v3 = Version {
major: v.major + 1,
minor: 0,
patch: 0,
};
assert!(
!v3.is_compatible_with_current(),
"major bump is incompatible"
);
}
#[test]
fn fst_separator_is_below_printable_ascii() {
const _: () = assert!(FST_SEPARATOR < b' ');
assert_eq!(FST_SEPARATOR, 0x1F);
}
#[test]
fn format_value_sentinel_is_the_expected_string() {
assert_eq!(kv::FORMAT_VALUE, "infino-superfile");
}
}