Skip to main content

index_transformer/
cache.rs

1//! Transformed document cache primitives.
2
3use std::collections::HashMap;
4
5use index_core::IndexDocument;
6
7/// Current transformer cache format version.
8pub const TRANSFORMER_VERSION: &str = "transformer-v1";
9
10/// Current adapter registry cache format version.
11pub const ADAPTER_REGISTRY_VERSION: &str = "adapters-v1";
12
13/// Stable key for transformed document cache entries.
14#[derive(Debug, Clone, PartialEq, Eq, Hash)]
15pub struct TransformCacheKey {
16    source_url: Option<String>,
17    content_hash: String,
18    adapter_version: String,
19    transformer_version: String,
20}
21
22impl TransformCacheKey {
23    /// Builds a cache key from a source URL and source bytes.
24    #[must_use]
25    pub fn new(source_url: Option<&str>, content: &str) -> Self {
26        Self {
27            source_url: source_url.map(str::to_owned),
28            content_hash: stable_hash_hex(content),
29            adapter_version: ADAPTER_REGISTRY_VERSION.to_owned(),
30            transformer_version: TRANSFORMER_VERSION.to_owned(),
31        }
32    }
33
34    /// Returns the source URL component.
35    #[must_use]
36    pub fn source_url(&self) -> Option<&str> {
37        self.source_url.as_deref()
38    }
39
40    /// Returns the stable content hash component.
41    #[must_use]
42    pub fn content_hash(&self) -> &str {
43        &self.content_hash
44    }
45}
46
47/// In-memory transformed document cache for repeated browsing workflows.
48#[derive(Debug, Clone, Default)]
49pub struct TransformedDocumentCache {
50    entries: HashMap<TransformCacheKey, IndexDocument>,
51}
52
53impl TransformedDocumentCache {
54    /// Creates an empty transformed document cache.
55    #[must_use]
56    pub fn new() -> Self {
57        Self::default()
58    }
59
60    /// Returns the cached document for a matching key.
61    #[must_use]
62    pub fn get(&self, key: &TransformCacheKey) -> Option<IndexDocument> {
63        self.entries.get(key).cloned()
64    }
65
66    /// Stores a transformed document.
67    pub fn insert(&mut self, key: TransformCacheKey, document: IndexDocument) {
68        self.entries.insert(key, document);
69    }
70
71    /// Returns the number of transformed documents currently retained.
72    #[must_use]
73    pub fn len(&self) -> usize {
74        self.entries.len()
75    }
76
77    /// Returns whether the cache is empty.
78    #[must_use]
79    pub fn is_empty(&self) -> bool {
80        self.entries.is_empty()
81    }
82}
83
84/// Returns a deterministic, dependency-light FNV-1a hash as hex.
85#[must_use]
86pub fn stable_hash_hex(content: &str) -> String {
87    let mut hash = 0xcbf2_9ce4_8422_2325_u64;
88    for byte in content.as_bytes() {
89        hash ^= u64::from(*byte);
90        hash = hash.wrapping_mul(0x0000_0100_0000_01b3);
91    }
92    format!("{hash:016x}")
93}
94
95#[cfg(test)]
96mod tests {
97    use index_core::{IndexDocument, IndexNode};
98
99    use super::{TransformCacheKey, TransformedDocumentCache, stable_hash_hex};
100
101    fn document(title: &str) -> IndexDocument {
102        let mut document = IndexDocument::titled(title);
103        document.push(IndexNode::Paragraph("cached".to_owned()));
104        document
105    }
106
107    #[test]
108    fn cache_roundtrips_transformed_documents() {
109        let key = TransformCacheKey::new(Some("https://example.org"), "<p>hello</p>");
110        let mut cache = TransformedDocumentCache::new();
111        cache.insert(key.clone(), document("Cached"));
112
113        let cached = cache.get(&key);
114
115        assert_eq!(cache.len(), 1);
116        assert!(matches!(cached, Some(document) if document.title == "Cached"));
117    }
118
119    #[test]
120    fn cache_key_invalidates_on_source_content_or_url_change() {
121        let base = TransformCacheKey::new(Some("https://example.org/a"), "<p>hello</p>");
122        let changed_content = TransformCacheKey::new(Some("https://example.org/a"), "<p>bye</p>");
123        let changed_url = TransformCacheKey::new(Some("https://example.org/b"), "<p>hello</p>");
124
125        assert_ne!(base, changed_content);
126        assert_ne!(base, changed_url);
127    }
128
129    #[test]
130    fn cache_key_invalidates_on_adapter_or_transformer_version_change() {
131        let base = TransformCacheKey::new(Some("https://example.org"), "content");
132        let changed_adapter = TransformCacheKey {
133            source_url: base.source_url.clone(),
134            content_hash: base.content_hash.clone(),
135            adapter_version: "adapters-v-next".to_owned(),
136            transformer_version: base.transformer_version.clone(),
137        };
138        let changed_transformer = TransformCacheKey {
139            source_url: base.source_url.clone(),
140            content_hash: base.content_hash.clone(),
141            adapter_version: base.adapter_version.clone(),
142            transformer_version: "transformer-v-next".to_owned(),
143        };
144
145        assert_ne!(base, changed_adapter);
146        assert_ne!(base, changed_transformer);
147    }
148
149    #[test]
150    fn stable_hash_is_deterministic() {
151        assert_eq!(stable_hash_hex("Index"), stable_hash_hex("Index"));
152        assert_ne!(stable_hash_hex("Index"), stable_hash_hex("index"));
153    }
154}