1use std::collections::HashMap;
14use std::path::{Path, PathBuf};
15use std::time::{SystemTime, UNIX_EPOCH};
16
17use super::agents::AgentRegistry;
18use super::file_table::{fnv1a64, FileId, IndexedFile, IndexedSymbol};
19use super::graph::DepGraph;
20use super::imports;
21use super::overlay::OverlayState;
22use super::symbol_graph::SymbolGraph;
23use super::trigram::TrigramIndex;
24use super::versions::VersionLog;
25use super::walker::{is_indexable_file, language_for_extension, walk_indexable, MAX_FILE_BYTES};
26use super::words::WordIndex;
27
28use crate::ast::{Language as AstLanguage, Symbol as AstSymbol};
29
30pub struct IndexState {
34 pub root: PathBuf,
36 pub files: HashMap<FileId, IndexedFile>,
38 pub path_to_id: HashMap<String, FileId>,
40 pub trigrams: TrigramIndex,
42 pub words: WordIndex,
44 pub deps: DepGraph,
46 pub versions: VersionLog,
48 pub agents: AgentRegistry,
50 pub symbols: SymbolGraph,
52 pub overlays: OverlayState,
54 pub last_built_unix_ms: i64,
56 pub git_head: Option<String>,
58 next_id: FileId,
59}
60
61#[derive(Debug, Default)]
63pub struct BuildOutcome {
64 pub files_indexed: u64,
66 pub files_skipped: u64,
69}
70
71impl IndexState {
72 pub fn build_from_root(root: &Path) -> (Self, BuildOutcome) {
75 let canonical_root = canonicalize(root);
76 let mut state = IndexState {
77 root: canonical_root.clone(),
78 files: HashMap::new(),
79 path_to_id: HashMap::new(),
80 trigrams: TrigramIndex::new(),
81 words: WordIndex::new(),
82 deps: DepGraph::new(),
83 versions: VersionLog::new(),
84 agents: AgentRegistry::new(),
85 symbols: SymbolGraph::new(),
86 overlays: OverlayState::new(),
87 last_built_unix_ms: now_unix_ms(),
88 git_head: read_git_head(&canonical_root),
89 next_id: 1,
90 };
91 let mut outcome = BuildOutcome::default();
92 let mut to_resolve: Vec<(FileId, String)> = Vec::new();
93 walk_indexable(&canonical_root, |abs| match state.ingest(abs) {
94 Some(file_id) => {
95 outcome.files_indexed += 1;
96 if let Some(file) = state.files.get(&file_id) {
97 to_resolve.push((file_id, file.relative_path.clone()));
98 }
99 }
100 None => {
101 outcome.files_skipped += 1;
102 }
103 });
104 for (id, rel) in to_resolve {
105 state.rebuild_deps(id, &rel);
106 state.rebuild_symbol_graph_for(id);
107 }
108 state.link_symbol_imports();
110 (state, outcome)
111 }
112
113 pub fn reindex_file(&mut self, abs: &Path) -> Option<FileId> {
118 if !abs.exists() {
119 self.remove_file_path(abs);
120 return None;
121 }
122 if !is_indexable_file(abs) || super::walker::is_sensitive_path(abs) {
123 self.remove_file_path(abs);
124 return None;
125 }
126 let id = self.ingest(abs)?;
127 let rel = self
128 .files
129 .get(&id)
130 .map(|f| f.relative_path.clone())
131 .unwrap_or_default();
132 if !rel.is_empty() {
133 self.rebuild_deps(id, &rel);
134 self.rebuild_symbol_graph_for(id);
135 self.link_symbol_imports();
136 }
137 Some(id)
138 }
139
140 pub fn remove_file_path(&mut self, abs: &Path) {
143 let Some(rel) = relative_path(&self.root, abs) else {
144 return;
145 };
146 let Some(id) = self.path_to_id.remove(&rel) else {
147 return;
148 };
149 self.files.remove(&id);
150 self.trigrams.remove_file(id);
151 self.words.remove_file(id);
152 self.deps.remove_file(id);
153 self.symbols.remove_file(id);
154 }
155
156 fn ingest(&mut self, abs: &Path) -> Option<FileId> {
157 if !is_indexable_file(abs) {
158 return None;
159 }
160 let metadata = std::fs::metadata(abs).ok()?;
161 if metadata.len() > MAX_FILE_BYTES {
162 return None;
163 }
164 let content = std::fs::read_to_string(abs).ok()?;
165 if content.len() > MAX_FILE_BYTES as usize {
166 return None;
167 }
168 let rel = relative_path(&self.root, abs)?;
169 let hash = fnv1a64(content.as_bytes());
170 let id = match self.path_to_id.get(&rel) {
171 Some(existing_id) => {
172 if let Some(file) = self.files.get(existing_id) {
173 if file.content_hash == hash {
174 return Some(*existing_id);
175 }
176 }
177 *existing_id
178 }
179 None => {
180 let id = self.next_id;
181 self.next_id = self.next_id.checked_add(1).expect("FileId overflow");
182 self.path_to_id.insert(rel.clone(), id);
183 id
184 }
185 };
186
187 let ext = abs
188 .extension()
189 .and_then(|s| s.to_str())
190 .unwrap_or("")
191 .to_ascii_lowercase();
192 let language = language_for_extension(&ext).to_string();
193 let imports = imports::extract_imports(&content, &language);
194 let mtime_ms = metadata
195 .modified()
196 .ok()
197 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
198 .map(|d| d.as_millis() as i64)
199 .unwrap_or(0);
200 let line_count = if content.is_empty() {
201 0
202 } else {
203 content.split('\n').count() as u32
204 };
205
206 let file = IndexedFile {
207 id,
208 relative_path: rel,
209 language,
210 size_bytes: content.len() as u64,
211 line_count,
212 content_hash: hash,
213 mtime_ms,
214 symbols: Vec::new(),
215 imports,
216 };
217 self.trigrams.index_file(id, &content);
218 self.words.index_file(id, &content);
219 self.files.insert(id, file);
220 Some(id)
221 }
222
223 fn rebuild_deps(&mut self, id: FileId, relative_path: &str) {
224 let Some(file) = self.files.get(&id).cloned() else {
225 return;
226 };
227 let resolved = imports::resolve(
228 &file.imports,
229 relative_path,
230 &file.language,
231 &self.path_to_id,
232 );
233 self.deps
234 .set_edges(id, resolved.resolved, resolved.unresolved);
235 }
236
237 pub(super) fn rebuild_symbol_graph_for(&mut self, id: FileId) {
247 let Some(file) = self.files.get(&id).cloned() else {
248 return;
249 };
250 let abs = self.root.join(&file.relative_path);
251 let Ok(source) = std::fs::read_to_string(&abs) else {
252 return;
253 };
254 let Some(language) = AstLanguage::detect(std::path::Path::new(&file.relative_path), None)
255 else {
256 return;
257 };
258 let outcome =
259 self.symbols
260 .rebuild_file(id, &file.relative_path, language, &source, &file.imports);
261 if let Some(file_mut) = self.files.get_mut(&id) {
262 file_mut.symbols = outcome
263 .symbols
264 .iter()
265 .map(indexed_symbol_from_ast)
266 .collect();
267 }
268 }
269
270 pub(super) fn link_symbol_imports(&mut self) {
275 let mut resolved: HashMap<FileId, Vec<FileId>> = HashMap::new();
276 for id in self.files.keys() {
277 resolved.insert(*id, self.deps.imports_of(*id));
278 }
279 self.symbols.link_imports(&resolved);
280 }
281
282 pub fn lookup_path(&self, raw: &str) -> Option<FileId> {
285 if let Some(id) = self.path_to_id.get(raw) {
286 return Some(*id);
287 }
288 let path = Path::new(raw);
289 if path.is_absolute() {
290 if let Some(rel) = relative_path(&self.root, path) {
291 if let Some(id) = self.path_to_id.get(&rel) {
292 return Some(*id);
293 }
294 }
295 }
296 None
297 }
298
299 pub fn estimated_bytes(&self) -> usize {
302 let file_bytes: usize = self
303 .files
304 .values()
305 .map(|f| f.relative_path.len() + f.imports.iter().map(|s| s.len()).sum::<usize>() + 64)
306 .sum();
307 self.trigrams.estimated_bytes() + self.words.estimated_bytes() + file_bytes
308 }
309
310 pub fn absolute_path(&self, rel_or_abs: &str) -> Option<PathBuf> {
314 let p = Path::new(rel_or_abs);
315 let candidate = if p.is_absolute() {
316 p.to_path_buf()
317 } else {
318 self.root.join(p)
319 };
320 let canonical = canonicalize_existing(&candidate);
321 if canonical.strip_prefix(&self.root).is_ok() {
322 Some(canonical)
323 } else {
324 None
325 }
326 }
327
328 pub(crate) fn empty(root: PathBuf) -> Self {
331 Self {
332 root,
333 files: HashMap::new(),
334 path_to_id: HashMap::new(),
335 trigrams: TrigramIndex::new(),
336 words: WordIndex::new(),
337 deps: DepGraph::new(),
338 versions: VersionLog::new(),
339 agents: AgentRegistry::new(),
340 symbols: SymbolGraph::new(),
341 overlays: OverlayState::new(),
342 last_built_unix_ms: 0,
343 git_head: None,
344 next_id: 1,
345 }
346 }
347
348 pub(crate) fn next_file_id_internal(&self) -> FileId {
350 self.next_id
351 }
352
353 pub(crate) fn set_next_file_id(&mut self, id: FileId) {
355 self.next_id = id.max(1);
356 }
357}
358
359fn indexed_symbol_from_ast(sym: &AstSymbol) -> IndexedSymbol {
364 IndexedSymbol {
365 name: sym.name.clone(),
366 kind: sym.kind.as_str().to_string(),
367 start_line: sym.start_row.saturating_add(1),
368 end_line: sym.end_row.saturating_add(1),
369 signature: sym.signature.clone(),
370 }
371}
372
373pub(crate) fn now_unix_ms() -> i64 {
376 SystemTime::now()
377 .duration_since(UNIX_EPOCH)
378 .map(|d| d.as_millis() as i64)
379 .unwrap_or(0)
380}
381
382fn canonicalize(root: &Path) -> PathBuf {
383 std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
384}
385
386pub(crate) fn relative_path(root: &Path, abs: &Path) -> Option<String> {
392 let canonical_abs = canonicalize_existing(abs);
393 let stripped = canonical_abs.strip_prefix(root).ok()?;
394 Some(stripped.to_string_lossy().replace('\\', "/"))
395}
396
397fn canonicalize_existing(abs: &Path) -> PathBuf {
398 if let Ok(c) = std::fs::canonicalize(abs) {
399 return c;
400 }
401 let mut tail: Vec<&std::ffi::OsStr> = Vec::new();
404 let mut cursor = abs;
405 loop {
406 if cursor.exists() {
407 if let Ok(canonical) = std::fs::canonicalize(cursor) {
408 let mut out = canonical;
409 for piece in tail.iter().rev() {
410 out = out.join(piece);
411 }
412 return out;
413 }
414 break;
415 }
416 match (cursor.parent(), cursor.file_name()) {
417 (Some(parent), Some(name)) if !parent.as_os_str().is_empty() => {
418 tail.push(name);
419 cursor = parent;
420 }
421 _ => break,
422 }
423 }
424 abs.to_path_buf()
425}
426
427fn read_git_head(workspace_root: &Path) -> Option<String> {
428 let head = workspace_root.join(".git").join("HEAD");
429 let txt = std::fs::read_to_string(&head).ok()?;
430 let line = txt.trim().to_string();
431 if let Some(ref_target) = line.strip_prefix("ref: ") {
432 let ref_path = workspace_root.join(".git").join(ref_target);
433 if let Ok(sha) = std::fs::read_to_string(&ref_path) {
434 return Some(sha.trim().to_string());
435 }
436 }
437 Some(line)
438}
439
440#[cfg(test)]
441mod tests {
442 use super::*;
443 use std::fs;
444 use tempfile::tempdir;
445
446 #[test]
447 fn build_indexes_files_and_resolves_imports() {
448 let dir = tempdir().unwrap();
449 let root = dir.path();
450 fs::create_dir_all(root.join("src")).unwrap();
451 fs::write(
452 root.join("src/main.rs"),
453 "use crate::util::helper;\nfn main() {}\n",
454 )
455 .unwrap();
456 fs::write(root.join("src/util.rs"), "pub fn helper() {}").unwrap();
457
458 let (state, outcome) = IndexState::build_from_root(root);
459 assert_eq!(outcome.files_indexed, 2);
460 assert_eq!(state.files.len(), 2);
461 let main_id = state.path_to_id["src/main.rs"];
462 let util_id = state.path_to_id["src/util.rs"];
463 assert_eq!(state.deps.imports_of(main_id), Vec::<FileId>::new());
465 let _ = util_id;
466 }
467
468 #[test]
469 fn typescript_imports_get_resolved() {
470 let dir = tempdir().unwrap();
471 let root = dir.path();
472 fs::create_dir_all(root.join("src")).unwrap();
473 fs::write(
474 root.join("src/index.ts"),
475 "import { helper } from \"./util\";\n",
476 )
477 .unwrap();
478 fs::write(root.join("src/util.ts"), "export function helper() {}").unwrap();
479
480 let (state, _) = IndexState::build_from_root(root);
481 let index_id = state.path_to_id["src/index.ts"];
482 let util_id = state.path_to_id["src/util.ts"];
483 assert_eq!(state.deps.imports_of(index_id), vec![util_id]);
484 assert_eq!(state.deps.importers_of(util_id), vec![index_id]);
485 }
486
487 #[test]
488 fn lookup_path_handles_absolute_paths() {
489 let dir = tempdir().unwrap();
490 let root = dir.path();
491 fs::create_dir_all(root.join("a/b")).unwrap();
492 fs::write(root.join("a/b/c.py"), "x = 1\n").unwrap();
493 let (state, _) = IndexState::build_from_root(root);
494 let abs = root.join("a/b/c.py");
495 let id = state.lookup_path(abs.to_str().unwrap()).unwrap();
496 assert_eq!(state.path_to_id["a/b/c.py"], id);
497 }
498
499 #[test]
500 fn reindex_file_picks_up_changes_in_place() {
501 let dir = tempdir().unwrap();
502 let root = dir.path();
503 fs::create_dir_all(root.join("src")).unwrap();
504 fs::write(root.join("src/a.ts"), "export const x = 1;\n").unwrap();
505 let (mut state, _) = IndexState::build_from_root(root);
506 let id = state.path_to_id["src/a.ts"];
507 let before_hash = state.files[&id].content_hash;
508
509 fs::write(root.join("src/a.ts"), "export const x = 2;\n").unwrap();
510 let new_id = state.reindex_file(&root.join("src/a.ts")).unwrap();
511 assert_eq!(new_id, id, "file id should be stable across reindex");
512 let after_hash = state.files[&id].content_hash;
513 assert_ne!(before_hash, after_hash);
514 }
515
516 #[test]
517 fn reindex_file_removes_entry_when_path_disappears() {
518 let dir = tempdir().unwrap();
519 let root = dir.path();
520 fs::create_dir_all(root.join("src")).unwrap();
521 fs::write(root.join("src/a.ts"), "export const x = 1;\n").unwrap();
522 let (mut state, _) = IndexState::build_from_root(root);
523 assert!(state.path_to_id.contains_key("src/a.ts"));
524
525 fs::remove_file(root.join("src/a.ts")).unwrap();
526 let result = state.reindex_file(&root.join("src/a.ts"));
527 assert!(result.is_none());
528 assert!(!state.path_to_id.contains_key("src/a.ts"));
529 }
530}