context_core/cache/
invalidation.rs1use std::collections::{BTreeMap, BTreeSet};
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use chrono::Utc;
6use sha2::{Digest, Sha256};
7use thiserror::Error;
8
9use crate::cache::cache::ContextCache;
10use crate::cache::versioning::{CacheBuildConfig, CacheIndex, CacheManifest, ManifestDocumentEntry};
11use crate::document::Document;
12
13#[derive(Debug, Error)]
14pub enum CacheBuildError {
15 #[error("IO error: {0}")]
16 Io(#[from] std::io::Error),
17 #[error("Serialization error: {0}")]
18 Serialization(#[from] serde_json::Error),
19 #[error("Output directory already exists: {0}")]
20 OutputExists(PathBuf),
21 #[error("Filename collision detected for hash fragment: {0}")]
22 FilenameCollision(String),
23 #[error("Duplicate document ID: {0}")]
24 DuplicateDocumentId(String),
25 #[error("Invalid version format: {0}")]
26 InvalidVersionFormat(String),
27}
28
29pub struct CacheBuilder {
31 config: CacheBuildConfig,
32}
33
34impl CacheBuilder {
35 pub fn new(config: CacheBuildConfig) -> Self {
36 Self { config }
37 }
38
39 pub fn build(
40 &self,
41 documents: Vec<Document>,
42 output_dir: &Path,
43 ) -> Result<ContextCache, CacheBuildError> {
44 if output_dir.exists() {
45 return Err(CacheBuildError::OutputExists(output_dir.to_path_buf()));
46 }
47
48 let mut sorted_docs = documents;
50 sorted_docs.sort_by(|a, b| a.id.cmp(&b.id));
51
52 for pair in sorted_docs.windows(2) {
54 if pair[0].id == pair[1].id {
55 return Err(CacheBuildError::DuplicateDocumentId(
56 pair[0].id.as_str().to_string(),
57 ));
58 }
59 }
60
61 let mut doc_contexts = Vec::with_capacity(sorted_docs.len());
64 let mut index_entries = BTreeMap::new();
65 let mut seen_filenames = BTreeSet::new();
66
67 let mut version_hasher = Sha256::new();
70
71 let config_json = serde_json::to_vec(&self.config)?;
73 version_hasher.update(&config_json);
74
75 for doc in &sorted_docs {
76 let line = format!("{}:{}", doc.id.as_str(), doc.version.as_str());
78 version_hasher.update(line.as_bytes());
79
80 let full_hash = doc
82 .version
83 .as_str()
84 .strip_prefix("sha256:")
85 .ok_or_else(|| CacheBuildError::InvalidVersionFormat(doc.version.as_str().to_string()))?;
86
87 if full_hash.len() < 12 {
88 return Err(CacheBuildError::FilenameCollision(full_hash.to_string()));
90 }
91 let filename_stem = &full_hash[..12];
92 let filename = format!("{}.json", filename_stem);
93
94 if seen_filenames.contains(filename_stem) {
96 return Err(CacheBuildError::FilenameCollision(filename_stem.to_string()));
97 }
98 seen_filenames.insert(filename_stem.to_string());
99
100 let relative_path = format!("documents/{}", filename);
102
103 let entry = ManifestDocumentEntry {
104 id: doc.id.clone(),
105 version: doc.version.clone(),
106 file: relative_path.clone(),
107 };
108
109 index_entries.insert(doc.id.clone(), relative_path);
110 doc_contexts.push((doc, entry));
111 }
112
113 let hash_bytes = version_hasher.finalize();
114 let cache_version = format!("sha256:{}", hex::encode(hash_bytes));
115
116 let mut manifest_documents: Vec<ManifestDocumentEntry> = doc_contexts
119 .iter()
120 .map(|(_, entry)| entry.clone())
121 .collect();
122
123 manifest_documents.sort_by(|a, b| a.id.cmp(&b.id));
125
126 let manifest = CacheManifest {
128 cache_version: cache_version.clone(),
129 build_config: self.config.clone(),
130 created_at: Utc::now(),
131 document_count: sorted_docs.len(),
132 documents: manifest_documents,
133 };
134
135 let index = CacheIndex::new(index_entries);
136
137 let temp_suffix = format!("tmp.{}", &cache_version[7..19]);
142 let temp_dir = output_dir.with_extension(temp_suffix);
143
144 if temp_dir.exists() {
146 fs::remove_dir_all(&temp_dir)?;
147 }
148 fs::create_dir_all(&temp_dir)?;
149 fs::create_dir(temp_dir.join("documents"))?;
150
151 for (doc, entry) in doc_contexts {
154 let path = temp_dir.join(&entry.file); let f = fs::File::create(path)?;
156 serde_json::to_writer(&f, doc)?;
157 f.sync_all()?;
158 }
159
160 let index_path = temp_dir.join("index.json");
162 let f_idx = fs::File::create(index_path)?;
163 serde_json::to_writer_pretty(&f_idx, &index)?;
165 f_idx.sync_all()?;
166
167 let manifest_path = temp_dir.join("manifest.json");
169 let f_man = fs::File::create(manifest_path)?;
170 serde_json::to_writer_pretty(&f_man, &manifest)?;
171 f_man.sync_all()?;
172
173 fs::rename(&temp_dir, output_dir)?;
175
176 Ok(ContextCache {
177 root: output_dir.to_path_buf(),
178 manifest,
179 })
180 }
181}