use std::collections::HashMap;
use index_core::IndexDocument;
pub const TRANSFORMER_VERSION: &str = "transformer-v1";
pub const ADAPTER_REGISTRY_VERSION: &str = "adapters-v1";
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct TransformCacheKey {
source_url: Option<String>,
content_hash: String,
adapter_version: String,
transformer_version: String,
}
impl TransformCacheKey {
#[must_use]
pub fn new(source_url: Option<&str>, content: &str) -> Self {
Self {
source_url: source_url.map(str::to_owned),
content_hash: stable_hash_hex(content),
adapter_version: ADAPTER_REGISTRY_VERSION.to_owned(),
transformer_version: TRANSFORMER_VERSION.to_owned(),
}
}
#[must_use]
pub fn source_url(&self) -> Option<&str> {
self.source_url.as_deref()
}
#[must_use]
pub fn content_hash(&self) -> &str {
&self.content_hash
}
}
#[derive(Debug, Clone, Default)]
pub struct TransformedDocumentCache {
entries: HashMap<TransformCacheKey, IndexDocument>,
}
impl TransformedDocumentCache {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn get(&self, key: &TransformCacheKey) -> Option<IndexDocument> {
self.entries.get(key).cloned()
}
pub fn insert(&mut self, key: TransformCacheKey, document: IndexDocument) {
self.entries.insert(key, document);
}
#[must_use]
pub fn len(&self) -> usize {
self.entries.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
}
#[must_use]
pub fn stable_hash_hex(content: &str) -> String {
let mut hash = 0xcbf2_9ce4_8422_2325_u64;
for byte in content.as_bytes() {
hash ^= u64::from(*byte);
hash = hash.wrapping_mul(0x0000_0100_0000_01b3);
}
format!("{hash:016x}")
}
#[cfg(test)]
mod tests {
use index_core::{IndexDocument, IndexNode};
use super::{TransformCacheKey, TransformedDocumentCache, stable_hash_hex};
fn document(title: &str) -> IndexDocument {
let mut document = IndexDocument::titled(title);
document.push(IndexNode::Paragraph("cached".to_owned()));
document
}
#[test]
fn cache_roundtrips_transformed_documents() {
let key = TransformCacheKey::new(Some("https://example.org"), "<p>hello</p>");
let mut cache = TransformedDocumentCache::new();
cache.insert(key.clone(), document("Cached"));
let cached = cache.get(&key);
assert_eq!(cache.len(), 1);
assert!(matches!(cached, Some(document) if document.title == "Cached"));
}
#[test]
fn cache_key_invalidates_on_source_content_or_url_change() {
let base = TransformCacheKey::new(Some("https://example.org/a"), "<p>hello</p>");
let changed_content = TransformCacheKey::new(Some("https://example.org/a"), "<p>bye</p>");
let changed_url = TransformCacheKey::new(Some("https://example.org/b"), "<p>hello</p>");
assert_ne!(base, changed_content);
assert_ne!(base, changed_url);
}
#[test]
fn cache_key_invalidates_on_adapter_or_transformer_version_change() {
let base = TransformCacheKey::new(Some("https://example.org"), "content");
let changed_adapter = TransformCacheKey {
source_url: base.source_url.clone(),
content_hash: base.content_hash.clone(),
adapter_version: "adapters-v-next".to_owned(),
transformer_version: base.transformer_version.clone(),
};
let changed_transformer = TransformCacheKey {
source_url: base.source_url.clone(),
content_hash: base.content_hash.clone(),
adapter_version: base.adapter_version.clone(),
transformer_version: "transformer-v-next".to_owned(),
};
assert_ne!(base, changed_adapter);
assert_ne!(base, changed_transformer);
}
#[test]
fn stable_hash_is_deterministic() {
assert_eq!(stable_hash_hex("Index"), stable_hash_hex("Index"));
assert_ne!(stable_hash_hex("Index"), stable_hash_hex("index"));
}
}