index_transformer/
cache.rs1use std::collections::HashMap;
4
5use index_core::IndexDocument;
6
7pub const TRANSFORMER_VERSION: &str = "transformer-v1";
9
10pub const ADAPTER_REGISTRY_VERSION: &str = "adapters-v1";
12
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
15pub struct TransformCacheKey {
16 source_url: Option<String>,
17 content_hash: String,
18 adapter_version: String,
19 transformer_version: String,
20}
21
22impl TransformCacheKey {
23 #[must_use]
25 pub fn new(source_url: Option<&str>, content: &str) -> Self {
26 Self {
27 source_url: source_url.map(str::to_owned),
28 content_hash: stable_hash_hex(content),
29 adapter_version: ADAPTER_REGISTRY_VERSION.to_owned(),
30 transformer_version: TRANSFORMER_VERSION.to_owned(),
31 }
32 }
33
34 #[must_use]
36 pub fn source_url(&self) -> Option<&str> {
37 self.source_url.as_deref()
38 }
39
40 #[must_use]
42 pub fn content_hash(&self) -> &str {
43 &self.content_hash
44 }
45}
46
47#[derive(Debug, Clone, Default)]
49pub struct TransformedDocumentCache {
50 entries: HashMap<TransformCacheKey, IndexDocument>,
51}
52
53impl TransformedDocumentCache {
54 #[must_use]
56 pub fn new() -> Self {
57 Self::default()
58 }
59
60 #[must_use]
62 pub fn get(&self, key: &TransformCacheKey) -> Option<IndexDocument> {
63 self.entries.get(key).cloned()
64 }
65
66 pub fn insert(&mut self, key: TransformCacheKey, document: IndexDocument) {
68 self.entries.insert(key, document);
69 }
70
71 #[must_use]
73 pub fn len(&self) -> usize {
74 self.entries.len()
75 }
76
77 #[must_use]
79 pub fn is_empty(&self) -> bool {
80 self.entries.is_empty()
81 }
82}
83
84#[must_use]
86pub fn stable_hash_hex(content: &str) -> String {
87 let mut hash = 0xcbf2_9ce4_8422_2325_u64;
88 for byte in content.as_bytes() {
89 hash ^= u64::from(*byte);
90 hash = hash.wrapping_mul(0x0000_0100_0000_01b3);
91 }
92 format!("{hash:016x}")
93}
94
95#[cfg(test)]
96mod tests {
97 use index_core::{IndexDocument, IndexNode};
98
99 use super::{TransformCacheKey, TransformedDocumentCache, stable_hash_hex};
100
101 fn document(title: &str) -> IndexDocument {
102 let mut document = IndexDocument::titled(title);
103 document.push(IndexNode::Paragraph("cached".to_owned()));
104 document
105 }
106
107 #[test]
108 fn cache_roundtrips_transformed_documents() {
109 let key = TransformCacheKey::new(Some("https://example.org"), "<p>hello</p>");
110 let mut cache = TransformedDocumentCache::new();
111 cache.insert(key.clone(), document("Cached"));
112
113 let cached = cache.get(&key);
114
115 assert_eq!(cache.len(), 1);
116 assert!(matches!(cached, Some(document) if document.title == "Cached"));
117 }
118
119 #[test]
120 fn cache_key_invalidates_on_source_content_or_url_change() {
121 let base = TransformCacheKey::new(Some("https://example.org/a"), "<p>hello</p>");
122 let changed_content = TransformCacheKey::new(Some("https://example.org/a"), "<p>bye</p>");
123 let changed_url = TransformCacheKey::new(Some("https://example.org/b"), "<p>hello</p>");
124
125 assert_ne!(base, changed_content);
126 assert_ne!(base, changed_url);
127 }
128
129 #[test]
130 fn cache_key_invalidates_on_adapter_or_transformer_version_change() {
131 let base = TransformCacheKey::new(Some("https://example.org"), "content");
132 let changed_adapter = TransformCacheKey {
133 source_url: base.source_url.clone(),
134 content_hash: base.content_hash.clone(),
135 adapter_version: "adapters-v-next".to_owned(),
136 transformer_version: base.transformer_version.clone(),
137 };
138 let changed_transformer = TransformCacheKey {
139 source_url: base.source_url.clone(),
140 content_hash: base.content_hash.clone(),
141 adapter_version: base.adapter_version.clone(),
142 transformer_version: "transformer-v-next".to_owned(),
143 };
144
145 assert_ne!(base, changed_adapter);
146 assert_ne!(base, changed_transformer);
147 }
148
149 #[test]
150 fn stable_hash_is_deterministic() {
151 assert_eq!(stable_hash_hex("Index"), stable_hash_hex("Index"));
152 assert_ne!(stable_hash_hex("Index"), stable_hash_hex("index"));
153 }
154}