1use std::collections::HashMap;
14use std::path::{Path, PathBuf};
15
16use serde::{Deserialize, Serialize};
17
18use super::agents::{AgentRegistry, RegistryConfig, SerializedRegistry};
19use super::file_table::{FileId, IndexedFile, IndexedSymbol};
20use super::graph::DepGraph;
21use super::trigram::TrigramIndex;
22use super::versions::VersionLog;
23use super::words::WordIndex;
24use super::IndexState;
25
26pub const SNAPSHOT_FORMAT_VERSION: u32 = 1;
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct SnapshotMeta {
34 pub format_version: u32,
37 pub workspace_root: String,
39 pub git_head: Option<String>,
41 pub indexed_at_ms: i64,
43 pub file_count: usize,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct SnapshotSymbol {
50 pub name: String,
52 pub kind: String,
54 #[serde(default)]
56 pub access_level: Option<String>,
57 pub start_line: u32,
59 pub end_line: u32,
61 pub signature: String,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct SnapshotFile {
68 pub id: FileId,
70 pub relative_path: String,
72 pub language: String,
74 pub size_bytes: u64,
76 pub line_count: u32,
78 pub content_hash: u64,
80 pub mtime_ms: i64,
82 pub symbols: Vec<SnapshotSymbol>,
84 pub imports: Vec<String>,
86}
87
88#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct TrigramPosting {
91 pub trigram: u32,
93 pub files: Vec<FileId>,
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct WordPosting {
100 pub word: String,
102 pub hits: Vec<(FileId, u32)>,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct DepRow {
109 pub from: FileId,
111 pub to: Vec<FileId>,
113 #[serde(default)]
115 pub unresolved: Vec<String>,
116}
117
118#[derive(Debug, Clone, Serialize, Deserialize)]
120pub struct CodeIndexSnapshot {
121 pub meta: SnapshotMeta,
123 pub next_file_id: FileId,
126 pub files: Vec<SnapshotFile>,
128 pub trigrams: Vec<TrigramPosting>,
130 pub words: Vec<WordPosting>,
132 pub deps: Vec<DepRow>,
134 pub versions: VersionLog,
136 pub agents: SerializedRegistry,
138}
139
140impl CodeIndexSnapshot {
141 pub fn path_for(workspace_root: &Path) -> PathBuf {
143 workspace_root
144 .join(".burin")
145 .join("index")
146 .join("snapshot.json")
147 }
148
149 pub fn save(&self, workspace_root: &Path) -> std::io::Result<()> {
152 let path = Self::path_for(workspace_root);
153 if let Some(parent) = path.parent() {
154 std::fs::create_dir_all(parent)?;
155 }
156 let tmp = path.with_extension("json.tmp");
157 let bytes = serde_json::to_vec(self).map_err(std::io::Error::other)?;
158 std::fs::write(&tmp, bytes)?;
159 std::fs::rename(&tmp, &path)?;
160 Ok(())
161 }
162
163 pub fn load(workspace_root: &Path) -> std::io::Result<Option<Self>> {
168 let path = Self::path_for(workspace_root);
169 if !path.exists() {
170 return Ok(None);
171 }
172 let bytes = std::fs::read(&path)?;
173 let snap: CodeIndexSnapshot =
174 serde_json::from_slice(&bytes).map_err(std::io::Error::other)?;
175 if snap.meta.format_version != SNAPSHOT_FORMAT_VERSION {
176 return Ok(None);
177 }
178 Ok(Some(snap))
179 }
180}
181
182impl IndexState {
183 pub fn snapshot(&self) -> CodeIndexSnapshot {
185 let files: Vec<SnapshotFile> = self
186 .files
187 .values()
188 .map(|f| SnapshotFile {
189 id: f.id,
190 relative_path: f.relative_path.clone(),
191 language: f.language.clone(),
192 size_bytes: f.size_bytes,
193 line_count: f.line_count,
194 content_hash: f.content_hash,
195 mtime_ms: f.mtime_ms,
196 symbols: f
197 .symbols
198 .iter()
199 .map(|s| SnapshotSymbol {
200 name: s.name.clone(),
201 kind: s.kind.clone(),
202 access_level: s.access_level.clone(),
203 start_line: s.start_line,
204 end_line: s.end_line,
205 signature: s.signature.clone(),
206 })
207 .collect(),
208 imports: f.imports.clone(),
209 })
210 .collect();
211
212 let trigrams = self.trigrams.snapshot_postings();
213 let words = self.words.snapshot_postings();
214 let deps = self.deps.snapshot_rows();
215
216 CodeIndexSnapshot {
217 meta: SnapshotMeta {
218 format_version: SNAPSHOT_FORMAT_VERSION,
219 workspace_root: self.root.to_string_lossy().into_owned(),
220 git_head: self.git_head.clone(),
221 indexed_at_ms: self.last_built_unix_ms,
222 file_count: self.files.len(),
223 },
224 next_file_id: self.next_file_id_internal(),
225 files,
226 trigrams,
227 words,
228 deps,
229 versions: self.versions.clone(),
230 agents: self.agents.snapshot(),
231 }
232 }
233
234 pub fn from_snapshot(snap: CodeIndexSnapshot) -> Self {
238 let root = PathBuf::from(snap.meta.workspace_root);
239 let mut files: HashMap<FileId, IndexedFile> = HashMap::with_capacity(snap.files.len());
240 let mut path_to_id: HashMap<String, FileId> = HashMap::with_capacity(snap.files.len());
241 for f in snap.files {
242 let indexed = IndexedFile {
243 id: f.id,
244 relative_path: f.relative_path.clone(),
245 language: f.language,
246 size_bytes: f.size_bytes,
247 line_count: f.line_count,
248 content_hash: f.content_hash,
249 mtime_ms: f.mtime_ms,
250 symbols: f
251 .symbols
252 .into_iter()
253 .map(|s| IndexedSymbol {
254 name: s.name,
255 kind: s.kind,
256 access_level: s.access_level,
257 start_line: s.start_line,
258 end_line: s.end_line,
259 signature: s.signature,
260 })
261 .collect(),
262 imports: f.imports,
263 };
264 path_to_id.insert(f.relative_path, f.id);
265 files.insert(f.id, indexed);
266 }
267 let trigrams = TrigramIndex::from_postings(snap.trigrams);
268 let words = WordIndex::from_postings(snap.words);
269 let deps = DepGraph::from_rows(snap.deps);
270 let agents = AgentRegistry::from_snapshot(RegistryConfig::default(), snap.agents);
271
272 let mut state = Self::empty(root);
273 state.files = files;
274 state.path_to_id = path_to_id;
275 state.trigrams = trigrams;
276 state.words = words;
277 state.deps = deps;
278 state.versions = snap.versions;
279 state.agents = agents;
280 state.last_built_unix_ms = snap.meta.indexed_at_ms;
281 state.git_head = snap.meta.git_head;
282 state.set_next_file_id(snap.next_file_id);
283 state
284 }
285
286 pub fn reap_after_recovery(&mut self, now_ms: i64) {
290 self.agents.reap(now_ms);
291 }
292}