1use std::collections::HashMap;
14use std::path::{Path, PathBuf};
15use std::time::{SystemTime, UNIX_EPOCH};
16
17use super::agents::AgentRegistry;
18use super::file_table::{fnv1a64, FileId, IndexedFile};
19use super::graph::DepGraph;
20use super::imports;
21use super::trigram::TrigramIndex;
22use super::versions::VersionLog;
23use super::walker::{is_indexable_file, language_for_extension, walk_indexable, MAX_FILE_BYTES};
24use super::words::WordIndex;
25
26pub struct IndexState {
30 pub root: PathBuf,
32 pub files: HashMap<FileId, IndexedFile>,
34 pub path_to_id: HashMap<String, FileId>,
36 pub trigrams: TrigramIndex,
38 pub words: WordIndex,
40 pub deps: DepGraph,
42 pub versions: VersionLog,
44 pub agents: AgentRegistry,
46 pub last_built_unix_ms: i64,
48 pub git_head: Option<String>,
50 next_id: FileId,
51}
52
53#[derive(Debug, Default)]
55pub struct BuildOutcome {
56 pub files_indexed: u64,
58 pub files_skipped: u64,
61}
62
63impl IndexState {
64 pub fn build_from_root(root: &Path) -> (Self, BuildOutcome) {
67 let canonical_root = canonicalize(root);
68 let mut state = IndexState {
69 root: canonical_root.clone(),
70 files: HashMap::new(),
71 path_to_id: HashMap::new(),
72 trigrams: TrigramIndex::new(),
73 words: WordIndex::new(),
74 deps: DepGraph::new(),
75 versions: VersionLog::new(),
76 agents: AgentRegistry::new(),
77 last_built_unix_ms: now_unix_ms(),
78 git_head: read_git_head(&canonical_root),
79 next_id: 1,
80 };
81 let mut outcome = BuildOutcome::default();
82 let mut to_resolve: Vec<(FileId, String)> = Vec::new();
83 walk_indexable(&canonical_root, |abs| match state.ingest(abs) {
84 Some(file_id) => {
85 outcome.files_indexed += 1;
86 if let Some(file) = state.files.get(&file_id) {
87 to_resolve.push((file_id, file.relative_path.clone()));
88 }
89 }
90 None => {
91 outcome.files_skipped += 1;
92 }
93 });
94 for (id, rel) in to_resolve {
95 state.rebuild_deps(id, &rel);
96 }
97 (state, outcome)
98 }
99
100 pub fn reindex_file(&mut self, abs: &Path) -> Option<FileId> {
105 if !abs.exists() {
106 self.remove_file_path(abs);
107 return None;
108 }
109 if !is_indexable_file(abs) || super::walker::is_sensitive_path(abs) {
110 self.remove_file_path(abs);
111 return None;
112 }
113 let id = self.ingest(abs)?;
114 let rel = self
115 .files
116 .get(&id)
117 .map(|f| f.relative_path.clone())
118 .unwrap_or_default();
119 if !rel.is_empty() {
120 self.rebuild_deps(id, &rel);
121 }
122 Some(id)
123 }
124
125 pub fn remove_file_path(&mut self, abs: &Path) {
128 let Some(rel) = relative_path(&self.root, abs) else {
129 return;
130 };
131 let Some(id) = self.path_to_id.remove(&rel) else {
132 return;
133 };
134 self.files.remove(&id);
135 self.trigrams.remove_file(id);
136 self.words.remove_file(id);
137 self.deps.remove_file(id);
138 }
139
140 fn ingest(&mut self, abs: &Path) -> Option<FileId> {
141 if !is_indexable_file(abs) {
142 return None;
143 }
144 let metadata = std::fs::metadata(abs).ok()?;
145 if metadata.len() > MAX_FILE_BYTES {
146 return None;
147 }
148 let content = std::fs::read_to_string(abs).ok()?;
149 if content.len() > MAX_FILE_BYTES as usize {
150 return None;
151 }
152 let rel = relative_path(&self.root, abs)?;
153 let hash = fnv1a64(content.as_bytes());
154 let id = match self.path_to_id.get(&rel) {
155 Some(existing_id) => {
156 if let Some(file) = self.files.get(existing_id) {
157 if file.content_hash == hash {
158 return Some(*existing_id);
159 }
160 }
161 *existing_id
162 }
163 None => {
164 let id = self.next_id;
165 self.next_id = self.next_id.checked_add(1).expect("FileId overflow");
166 self.path_to_id.insert(rel.clone(), id);
167 id
168 }
169 };
170
171 let ext = abs
172 .extension()
173 .and_then(|s| s.to_str())
174 .unwrap_or("")
175 .to_ascii_lowercase();
176 let language = language_for_extension(&ext).to_string();
177 let imports = imports::extract_imports(&content, &language);
178 let mtime_ms = metadata
179 .modified()
180 .ok()
181 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
182 .map(|d| d.as_millis() as i64)
183 .unwrap_or(0);
184 let line_count = if content.is_empty() {
185 0
186 } else {
187 content.split('\n').count() as u32
188 };
189
190 let file = IndexedFile {
191 id,
192 relative_path: rel,
193 language,
194 size_bytes: content.len() as u64,
195 line_count,
196 content_hash: hash,
197 mtime_ms,
198 symbols: Vec::new(),
199 imports,
200 };
201 self.trigrams.index_file(id, &content);
202 self.words.index_file(id, &content);
203 self.files.insert(id, file);
204 Some(id)
205 }
206
207 fn rebuild_deps(&mut self, id: FileId, relative_path: &str) {
208 let Some(file) = self.files.get(&id).cloned() else {
209 return;
210 };
211 let resolved = imports::resolve(
212 &file.imports,
213 relative_path,
214 &file.language,
215 &self.path_to_id,
216 );
217 self.deps
218 .set_edges(id, resolved.resolved, resolved.unresolved);
219 }
220
221 pub fn lookup_path(&self, raw: &str) -> Option<FileId> {
224 if let Some(id) = self.path_to_id.get(raw) {
225 return Some(*id);
226 }
227 let path = Path::new(raw);
228 if path.is_absolute() {
229 if let Some(rel) = relative_path(&self.root, path) {
230 if let Some(id) = self.path_to_id.get(&rel) {
231 return Some(*id);
232 }
233 }
234 }
235 None
236 }
237
238 pub fn estimated_bytes(&self) -> usize {
241 let file_bytes: usize = self
242 .files
243 .values()
244 .map(|f| f.relative_path.len() + f.imports.iter().map(|s| s.len()).sum::<usize>() + 64)
245 .sum();
246 self.trigrams.estimated_bytes() + self.words.estimated_bytes() + file_bytes
247 }
248
249 pub fn absolute_path(&self, rel_or_abs: &str) -> PathBuf {
253 let p = Path::new(rel_or_abs);
254 if p.is_absolute() {
255 p.to_path_buf()
256 } else {
257 self.root.join(p)
258 }
259 }
260
261 pub(crate) fn empty(root: PathBuf) -> Self {
264 Self {
265 root,
266 files: HashMap::new(),
267 path_to_id: HashMap::new(),
268 trigrams: TrigramIndex::new(),
269 words: WordIndex::new(),
270 deps: DepGraph::new(),
271 versions: VersionLog::new(),
272 agents: AgentRegistry::new(),
273 last_built_unix_ms: 0,
274 git_head: None,
275 next_id: 1,
276 }
277 }
278
279 pub(crate) fn next_file_id_internal(&self) -> FileId {
281 self.next_id
282 }
283
284 pub(crate) fn set_next_file_id(&mut self, id: FileId) {
286 self.next_id = id.max(1);
287 }
288}
289
290pub(crate) fn now_unix_ms() -> i64 {
293 SystemTime::now()
294 .duration_since(UNIX_EPOCH)
295 .map(|d| d.as_millis() as i64)
296 .unwrap_or(0)
297}
298
299fn canonicalize(root: &Path) -> PathBuf {
300 std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
301}
302
303pub(crate) fn relative_path(root: &Path, abs: &Path) -> Option<String> {
309 let canonical_abs = canonicalize_existing(abs);
310 let stripped = canonical_abs.strip_prefix(root).ok()?;
311 Some(stripped.to_string_lossy().replace('\\', "/"))
312}
313
314fn canonicalize_existing(abs: &Path) -> PathBuf {
315 if let Ok(c) = std::fs::canonicalize(abs) {
316 return c;
317 }
318 let mut tail: Vec<&std::ffi::OsStr> = Vec::new();
321 let mut cursor = abs;
322 loop {
323 if cursor.exists() {
324 if let Ok(canonical) = std::fs::canonicalize(cursor) {
325 let mut out = canonical;
326 for piece in tail.iter().rev() {
327 out = out.join(piece);
328 }
329 return out;
330 }
331 break;
332 }
333 match (cursor.parent(), cursor.file_name()) {
334 (Some(parent), Some(name)) if !parent.as_os_str().is_empty() => {
335 tail.push(name);
336 cursor = parent;
337 }
338 _ => break,
339 }
340 }
341 abs.to_path_buf()
342}
343
344fn read_git_head(workspace_root: &Path) -> Option<String> {
345 let head = workspace_root.join(".git").join("HEAD");
346 let txt = std::fs::read_to_string(&head).ok()?;
347 let line = txt.trim().to_string();
348 if let Some(ref_target) = line.strip_prefix("ref: ") {
349 let ref_path = workspace_root.join(".git").join(ref_target);
350 if let Ok(sha) = std::fs::read_to_string(&ref_path) {
351 return Some(sha.trim().to_string());
352 }
353 }
354 Some(line)
355}
356
357#[cfg(test)]
358mod tests {
359 use super::*;
360 use std::fs;
361 use tempfile::tempdir;
362
363 #[test]
364 fn build_indexes_files_and_resolves_imports() {
365 let dir = tempdir().unwrap();
366 let root = dir.path();
367 fs::create_dir_all(root.join("src")).unwrap();
368 fs::write(
369 root.join("src/main.rs"),
370 "use crate::util::helper;\nfn main() {}\n",
371 )
372 .unwrap();
373 fs::write(root.join("src/util.rs"), "pub fn helper() {}").unwrap();
374
375 let (state, outcome) = IndexState::build_from_root(root);
376 assert_eq!(outcome.files_indexed, 2);
377 assert_eq!(state.files.len(), 2);
378 let main_id = state.path_to_id["src/main.rs"];
379 let util_id = state.path_to_id["src/util.rs"];
380 assert_eq!(state.deps.imports_of(main_id), Vec::<FileId>::new());
382 let _ = util_id;
383 }
384
385 #[test]
386 fn typescript_imports_get_resolved() {
387 let dir = tempdir().unwrap();
388 let root = dir.path();
389 fs::create_dir_all(root.join("src")).unwrap();
390 fs::write(
391 root.join("src/index.ts"),
392 "import { helper } from \"./util\";\n",
393 )
394 .unwrap();
395 fs::write(root.join("src/util.ts"), "export function helper() {}").unwrap();
396
397 let (state, _) = IndexState::build_from_root(root);
398 let index_id = state.path_to_id["src/index.ts"];
399 let util_id = state.path_to_id["src/util.ts"];
400 assert_eq!(state.deps.imports_of(index_id), vec![util_id]);
401 assert_eq!(state.deps.importers_of(util_id), vec![index_id]);
402 }
403
404 #[test]
405 fn lookup_path_handles_absolute_paths() {
406 let dir = tempdir().unwrap();
407 let root = dir.path();
408 fs::create_dir_all(root.join("a/b")).unwrap();
409 fs::write(root.join("a/b/c.py"), "x = 1\n").unwrap();
410 let (state, _) = IndexState::build_from_root(root);
411 let abs = root.join("a/b/c.py");
412 let id = state.lookup_path(abs.to_str().unwrap()).unwrap();
413 assert_eq!(state.path_to_id["a/b/c.py"], id);
414 }
415
416 #[test]
417 fn reindex_file_picks_up_changes_in_place() {
418 let dir = tempdir().unwrap();
419 let root = dir.path();
420 fs::create_dir_all(root.join("src")).unwrap();
421 fs::write(root.join("src/a.ts"), "export const x = 1;\n").unwrap();
422 let (mut state, _) = IndexState::build_from_root(root);
423 let id = state.path_to_id["src/a.ts"];
424 let before_hash = state.files[&id].content_hash;
425
426 fs::write(root.join("src/a.ts"), "export const x = 2;\n").unwrap();
427 let new_id = state.reindex_file(&root.join("src/a.ts")).unwrap();
428 assert_eq!(new_id, id, "file id should be stable across reindex");
429 let after_hash = state.files[&id].content_hash;
430 assert_ne!(before_hash, after_hash);
431 }
432
433 #[test]
434 fn reindex_file_removes_entry_when_path_disappears() {
435 let dir = tempdir().unwrap();
436 let root = dir.path();
437 fs::create_dir_all(root.join("src")).unwrap();
438 fs::write(root.join("src/a.ts"), "export const x = 1;\n").unwrap();
439 let (mut state, _) = IndexState::build_from_root(root);
440 assert!(state.path_to_id.contains_key("src/a.ts"));
441
442 fs::remove_file(root.join("src/a.ts")).unwrap();
443 let result = state.reindex_file(&root.join("src/a.ts"));
444 assert!(result.is_none());
445 assert!(!state.path_to_id.contains_key("src/a.ts"));
446 }
447}