use crate::{
db::{
commit::CommitSchemaFingerprint, index::canonical_index_predicate,
predicate::hash_predicate,
},
model::{
EntityModel,
index::{IndexKeyItem, IndexKeyItemsRef, IndexModel},
},
traits::EntityKind,
};
use icydb_utils::Xxh3;
use sha2::{Digest, Sha256};
use std::{any::TypeId, cell::RefCell, collections::HashMap};
thread_local! {
static ENTITY_COMMIT_SCHEMA_FINGERPRINT_CACHE:
RefCell<HashMap<TypeId, CommitSchemaFingerprint>> = RefCell::new(HashMap::new());
}
const COMMIT_SCHEMA_FINGERPRINT_VERSION: u8 = 2;
const INDEX_KEY_ITEM_FIELD_TAG: u8 = 0x00;
const INDEX_KEY_ITEM_EXPRESSION_TAG: u8 = 0x01;
const INDEX_PREDICATE_NONE_TAG: u8 = 0x00;
const INDEX_PREDICATE_SEMANTIC_TAG: u8 = 0x01;
#[must_use]
pub(crate) fn commit_schema_fingerprint_for_entity<E: EntityKind + 'static>()
-> CommitSchemaFingerprint {
let cache_key = TypeId::of::<E>();
let cached = ENTITY_COMMIT_SCHEMA_FINGERPRINT_CACHE
.with(|cache| cache.borrow().get(&cache_key).copied());
if let Some(fingerprint) = cached {
return fingerprint;
}
let fingerprint = commit_schema_fingerprint_for_model(E::PATH, E::MODEL);
ENTITY_COMMIT_SCHEMA_FINGERPRINT_CACHE.with(|cache| {
cache.borrow_mut().insert(cache_key, fingerprint);
});
fingerprint
}
#[must_use]
pub(crate) fn commit_schema_fingerprint_for_model(
entity_path: &'static str,
model: &'static EntityModel,
) -> CommitSchemaFingerprint {
let mut hasher = Xxh3::with_seed(0);
hasher.update(&[COMMIT_SCHEMA_FINGERPRINT_VERSION]);
hash_labeled_str(&mut hasher, "entity_path", entity_path);
hash_entity_model_for_commit(&mut hasher, model);
hasher.digest128().to_be_bytes()
}
fn hash_entity_model_for_commit(hasher: &mut Xxh3, model: &EntityModel) {
hash_labeled_str(hasher, "model_path", model.path);
hash_labeled_str(hasher, "entity_name", model.entity_name);
hash_labeled_str(hasher, "primary_key", model.primary_key.name);
hash_labeled_len(hasher, "field_count", model.fields.len());
for field in model.fields {
hash_labeled_str(hasher, "field_name", field.name);
}
hash_labeled_len(hasher, "index_count", model.indexes.len());
for index in model.indexes {
hash_labeled_str(hasher, "index_name", index.name());
hash_labeled_str(hasher, "index_store", index.store());
hasher.update(&[u8::from(index.is_unique())]);
hash_index_key_items_contract(hasher, index);
hash_index_predicate_contract(hasher, index);
}
}
fn hash_index_key_items_contract(hasher: &mut Xxh3, index: &IndexModel) {
match index.key_items() {
IndexKeyItemsRef::Fields(fields) => {
hash_labeled_len(hasher, "index_field_count", fields.len());
for field in fields {
hash_labeled_tag(hasher, "index_key_item_kind", INDEX_KEY_ITEM_FIELD_TAG);
hash_labeled_str(hasher, "index_field_name", field);
}
}
IndexKeyItemsRef::Items(items) => {
hash_labeled_len(hasher, "index_field_count", items.len());
for item in items {
match item {
IndexKeyItem::Field(field) => {
hash_labeled_tag(hasher, "index_key_item_kind", INDEX_KEY_ITEM_FIELD_TAG);
hash_labeled_str(hasher, "index_field_name", field);
}
IndexKeyItem::Expression(expression) => {
hash_labeled_tag(
hasher,
"index_key_item_kind",
INDEX_KEY_ITEM_EXPRESSION_TAG,
);
hash_labeled_tag(hasher, "index_expression_kind", expression.kind_tag());
hash_labeled_str(hasher, "index_expression_field", expression.field());
}
}
}
}
}
}
fn hash_index_predicate_contract(hasher: &mut Xxh3, index: &IndexModel) {
match canonical_index_predicate(index) {
None => hash_labeled_tag(hasher, "index_predicate_kind", INDEX_PREDICATE_NONE_TAG),
Some(predicate) => {
hash_labeled_tag(hasher, "index_predicate_kind", INDEX_PREDICATE_SEMANTIC_TAG);
let mut predicate_hasher = Sha256::new();
hash_predicate(&mut predicate_hasher, predicate);
let digest = predicate_hasher.finalize();
hash_labeled_len(hasher, "index_predicate_semantic_hash_len", digest.len());
hasher.update(digest.as_slice());
}
}
}
fn hash_labeled_tag(hasher: &mut Xxh3, label: &str, tag: u8) {
hasher.update(label.as_bytes());
hasher.update(&[tag]);
}
fn hash_labeled_str(hasher: &mut Xxh3, label: &str, value: &str) {
hash_labeled_len(hasher, label, value.len());
hasher.update(value.as_bytes());
}
fn hash_labeled_len(hasher: &mut Xxh3, label: &str, len: usize) {
hasher.update(label.as_bytes());
hasher.update(&u64::try_from(len).unwrap_or(u64::MAX).to_be_bytes());
}
#[cfg(test)]
mod tests {
use crate::{
db::Predicate,
db::schema::fingerprint::{hash_entity_model_for_commit, hash_labeled_str},
model::{
entity::EntityModel,
field::{FieldKind, FieldModel},
index::{IndexExpression, IndexKeyItem, IndexModel, IndexPredicateMetadata},
},
};
use icydb_utils::Xxh3;
use std::sync::LazyLock;
const INDEX_FIELDS: [&str; 1] = ["active"];
static FIELD_MODELS: [FieldModel; 2] = [
FieldModel::generated("id", FieldKind::Ulid),
FieldModel::generated("active", FieldKind::Bool),
];
static ACTIVE_TRUE_PREDICATE: LazyLock<Predicate> =
LazyLock::new(|| Predicate::eq("active".to_string(), true.into()));
static ACTIVE_FALSE_PREDICATE: LazyLock<Predicate> =
LazyLock::new(|| Predicate::eq("active".to_string(), false.into()));
fn active_true_predicate() -> &'static Predicate {
&ACTIVE_TRUE_PREDICATE
}
fn active_false_predicate() -> &'static Predicate {
&ACTIVE_FALSE_PREDICATE
}
const fn active_true_predicate_metadata(sql: &'static str) -> IndexPredicateMetadata {
IndexPredicateMetadata::generated(sql, active_true_predicate)
}
const fn active_false_predicate_metadata() -> IndexPredicateMetadata {
IndexPredicateMetadata::generated("active = false", active_false_predicate)
}
static INDEX_MODEL_PRED_TRUE_A: IndexModel = IndexModel::generated_with_predicate(
"entity|active",
"entity::store",
&INDEX_FIELDS,
false,
Some(active_true_predicate_metadata("active = true")),
);
static INDEX_MODEL_PRED_TRUE_B: IndexModel = IndexModel::generated_with_predicate(
"entity|active",
"entity::store",
&INDEX_FIELDS,
false,
Some(active_true_predicate_metadata("active=true")),
);
static INDEX_MODEL_PRED_FALSE: IndexModel = IndexModel::generated_with_predicate(
"entity|active",
"entity::store",
&INDEX_FIELDS,
false,
Some(active_false_predicate_metadata()),
);
static INDEX_KEY_ITEMS_FIELD: [IndexKeyItem; 1] = [IndexKeyItem::Field("active")];
static INDEX_MODEL_KEY_ITEMS_FIELD: IndexModel =
IndexModel::generated_with_key_items_and_predicate(
"entity|active",
"entity::store",
&INDEX_FIELDS,
Some(&INDEX_KEY_ITEMS_FIELD),
false,
Some(active_true_predicate_metadata("active=true")),
);
static INDEX_KEY_ITEMS_EXPR: [IndexKeyItem; 1] =
[IndexKeyItem::Expression(IndexExpression::Lower("active"))];
static INDEX_MODEL_KEY_ITEMS_EXPR: IndexModel =
IndexModel::generated_with_key_items_and_predicate(
"entity|active",
"entity::store",
&INDEX_FIELDS,
Some(&INDEX_KEY_ITEMS_EXPR),
false,
Some(active_true_predicate_metadata("active=true")),
);
static INDEX_REFS_TRUE_A: [&IndexModel; 1] = [&INDEX_MODEL_PRED_TRUE_A];
static INDEX_REFS_TRUE_B: [&IndexModel; 1] = [&INDEX_MODEL_PRED_TRUE_B];
static INDEX_REFS_FALSE: [&IndexModel; 1] = [&INDEX_MODEL_PRED_FALSE];
static INDEX_REFS_KEY_ITEMS_FIELD: [&IndexModel; 1] = [&INDEX_MODEL_KEY_ITEMS_FIELD];
static INDEX_REFS_KEY_ITEMS_EXPR: [&IndexModel; 1] = [&INDEX_MODEL_KEY_ITEMS_EXPR];
static MODEL_TRUE_A: EntityModel = EntityModel::generated(
"fingerprint::Entity",
"Entity",
&FIELD_MODELS[0],
0,
&FIELD_MODELS,
&INDEX_REFS_TRUE_A,
);
static MODEL_TRUE_B: EntityModel = EntityModel::generated(
"fingerprint::Entity",
"Entity",
&FIELD_MODELS[0],
0,
&FIELD_MODELS,
&INDEX_REFS_TRUE_B,
);
static MODEL_FALSE: EntityModel = EntityModel::generated(
"fingerprint::Entity",
"Entity",
&FIELD_MODELS[0],
0,
&FIELD_MODELS,
&INDEX_REFS_FALSE,
);
static MODEL_KEY_ITEMS_FIELD: EntityModel = EntityModel::generated(
"fingerprint::Entity",
"Entity",
&FIELD_MODELS[0],
0,
&FIELD_MODELS,
&INDEX_REFS_KEY_ITEMS_FIELD,
);
static MODEL_KEY_ITEMS_EXPR: EntityModel = EntityModel::generated(
"fingerprint::Entity",
"Entity",
&FIELD_MODELS[0],
0,
&FIELD_MODELS,
&INDEX_REFS_KEY_ITEMS_EXPR,
);
fn fingerprint_for_model(model: &EntityModel) -> [u8; 16] {
let mut hasher = Xxh3::with_seed(0);
hasher.update(&[super::COMMIT_SCHEMA_FINGERPRINT_VERSION]);
hash_labeled_str(&mut hasher, "entity_path", model.path());
hash_entity_model_for_commit(&mut hasher, model);
hasher.digest128().to_be_bytes()
}
#[test]
fn schema_fingerprint_changes_when_index_predicate_semantics_change() {
assert_ne!(
fingerprint_for_model(&MODEL_TRUE_A),
fingerprint_for_model(&MODEL_FALSE),
"semantic predicate changes must change commit schema fingerprint",
);
}
#[test]
fn schema_fingerprint_is_stable_for_equivalent_index_predicate_sql_text() {
assert_eq!(
fingerprint_for_model(&MODEL_TRUE_A),
fingerprint_for_model(&MODEL_TRUE_B),
"equivalent predicate SQL text should hash to the same semantic schema fingerprint",
);
}
#[test]
fn schema_fingerprint_preserves_field_only_parity_for_key_item_metadata() {
assert_eq!(
fingerprint_for_model(&MODEL_TRUE_A),
fingerprint_for_model(&MODEL_KEY_ITEMS_FIELD),
"field-only key-item metadata should hash identically to field metadata",
);
}
#[test]
fn schema_fingerprint_changes_when_expression_key_item_semantics_change() {
assert_ne!(
fingerprint_for_model(&MODEL_TRUE_A),
fingerprint_for_model(&MODEL_KEY_ITEMS_EXPR),
"expression key-item metadata must contribute to schema fingerprint semantics",
);
}
}