1use std::collections::HashMap;
14use std::path::{Path, PathBuf};
15use std::time::{SystemTime, UNIX_EPOCH};
16
17use super::agents::AgentRegistry;
18use super::file_table::{fnv1a64, FileId, IndexedFile, IndexedSymbol};
19use super::graph::DepGraph;
20use super::imports;
21use super::overlay::OverlayState;
22use super::symbol_graph::SymbolGraph;
23use super::trigram::TrigramIndex;
24use super::versions::VersionLog;
25use super::walker::{is_indexable_file, language_for_extension, walk_indexable, MAX_FILE_BYTES};
26use super::words::WordIndex;
27
28use crate::ast::{Language as AstLanguage, Symbol as AstSymbol};
29
30pub struct IndexState {
34 pub root: PathBuf,
36 pub files: HashMap<FileId, IndexedFile>,
38 pub path_to_id: HashMap<String, FileId>,
40 pub trigrams: TrigramIndex,
42 pub words: WordIndex,
44 pub deps: DepGraph,
46 pub versions: VersionLog,
48 pub agents: AgentRegistry,
50 pub symbols: SymbolGraph,
52 pub overlays: OverlayState,
54 pub last_built_unix_ms: i64,
56 pub git_head: Option<String>,
58 next_id: FileId,
59}
60
61#[derive(Debug, Default)]
63pub struct BuildOutcome {
64 pub files_indexed: u64,
66 pub files_skipped: u64,
69}
70
71impl IndexState {
72 pub fn build_from_root(root: &Path) -> (Self, BuildOutcome) {
75 let canonical_root = canonicalize(root);
76 let mut state = IndexState {
77 root: canonical_root.clone(),
78 files: HashMap::new(),
79 path_to_id: HashMap::new(),
80 trigrams: TrigramIndex::new(),
81 words: WordIndex::new(),
82 deps: DepGraph::new(),
83 versions: VersionLog::new(),
84 agents: AgentRegistry::new(),
85 symbols: SymbolGraph::new(),
86 overlays: OverlayState::new(),
87 last_built_unix_ms: now_unix_ms(),
88 git_head: read_git_head(&canonical_root),
89 next_id: 1,
90 };
91 let mut outcome = BuildOutcome::default();
92 let mut to_resolve: Vec<(FileId, String)> = Vec::new();
93 walk_indexable(&canonical_root, |abs| match state.ingest(abs) {
94 Some(file_id) => {
95 outcome.files_indexed += 1;
96 if let Some(file) = state.files.get(&file_id) {
97 to_resolve.push((file_id, file.relative_path.clone()));
98 }
99 }
100 None => {
101 outcome.files_skipped += 1;
102 }
103 });
104 for (id, rel) in to_resolve {
105 state.rebuild_deps(id, &rel);
106 state.rebuild_symbol_graph_for(id);
107 }
108 state.link_symbol_imports();
110 (state, outcome)
111 }
112
113 pub fn reindex_file(&mut self, abs: &Path) -> Option<FileId> {
118 if !abs.exists() {
119 self.remove_file_path(abs);
120 return None;
121 }
122 if !is_indexable_file(abs) || super::walker::is_sensitive_path(abs) {
123 self.remove_file_path(abs);
124 return None;
125 }
126 let id = self.ingest(abs)?;
127 let rel = self
128 .files
129 .get(&id)
130 .map(|f| f.relative_path.clone())
131 .unwrap_or_default();
132 if !rel.is_empty() {
133 self.rebuild_deps(id, &rel);
134 self.rebuild_symbol_graph_for(id);
135 self.link_symbol_imports();
136 }
137 Some(id)
138 }
139
140 pub fn remove_file_path(&mut self, abs: &Path) {
143 let Some(rel) = relative_path(&self.root, abs) else {
144 return;
145 };
146 let Some(id) = self.path_to_id.remove(&rel) else {
147 return;
148 };
149 self.files.remove(&id);
150 self.trigrams.remove_file(id);
151 self.words.remove_file(id);
152 self.deps.remove_file(id);
153 self.symbols.remove_file(id);
154 }
155
156 fn ingest(&mut self, abs: &Path) -> Option<FileId> {
157 if !is_indexable_file(abs) {
158 return None;
159 }
160 let metadata = std::fs::metadata(abs).ok()?;
161 if metadata.len() > MAX_FILE_BYTES {
162 return None;
163 }
164 let content = std::fs::read_to_string(abs).ok()?;
165 if content.len() > MAX_FILE_BYTES as usize {
166 return None;
167 }
168 let rel = relative_path(&self.root, abs)?;
169 let hash = fnv1a64(content.as_bytes());
170 let id = match self.path_to_id.get(&rel) {
171 Some(existing_id) => {
172 if let Some(file) = self.files.get(existing_id) {
173 if file.content_hash == hash {
174 return Some(*existing_id);
175 }
176 }
177 *existing_id
178 }
179 None => {
180 let id = self.next_id;
181 self.next_id = self.next_id.checked_add(1).expect("FileId overflow");
182 self.path_to_id.insert(rel.clone(), id);
183 id
184 }
185 };
186
187 let ext = abs
188 .extension()
189 .and_then(|s| s.to_str())
190 .unwrap_or("")
191 .to_ascii_lowercase();
192 let language = language_for_extension(&ext).to_string();
193 let imports = imports::extract_imports(&content, &language);
194 let mtime_ms = metadata
195 .modified()
196 .ok()
197 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
198 .map(|d| d.as_millis() as i64)
199 .unwrap_or(0);
200 let line_count = crate::text::count_lines(content.as_bytes()) as u32;
201
202 let file = IndexedFile {
203 id,
204 relative_path: rel,
205 language,
206 size_bytes: content.len() as u64,
207 line_count,
208 content_hash: hash,
209 mtime_ms,
210 symbols: Vec::new(),
211 imports,
212 };
213 self.trigrams.index_file(id, &content);
214 self.words.index_file(id, &content);
215 self.files.insert(id, file);
216 Some(id)
217 }
218
219 fn rebuild_deps(&mut self, id: FileId, relative_path: &str) {
220 let Some(file) = self.files.get(&id).cloned() else {
221 return;
222 };
223 let resolved = imports::resolve(
224 &file.imports,
225 relative_path,
226 &file.language,
227 &self.path_to_id,
228 );
229 self.deps
230 .set_edges(id, resolved.resolved, resolved.unresolved);
231 }
232
233 pub(super) fn rebuild_symbol_graph_for(&mut self, id: FileId) {
243 let Some(file) = self.files.get(&id).cloned() else {
244 return;
245 };
246 let abs = self.root.join(&file.relative_path);
247 let Ok(source) = std::fs::read_to_string(&abs) else {
248 return;
249 };
250 let Some(language) = AstLanguage::detect(std::path::Path::new(&file.relative_path), None)
251 else {
252 return;
253 };
254 let outcome =
255 self.symbols
256 .rebuild_file(id, &file.relative_path, language, &source, &file.imports);
257 if let Some(file_mut) = self.files.get_mut(&id) {
258 file_mut.symbols = outcome
259 .symbols
260 .iter()
261 .map(indexed_symbol_from_ast)
262 .collect();
263 }
264 }
265
266 pub(super) fn link_symbol_imports(&mut self) {
271 let mut resolved: HashMap<FileId, Vec<FileId>> = HashMap::new();
272 for id in self.files.keys() {
273 resolved.insert(*id, self.deps.imports_of(*id));
274 }
275 self.symbols.link_imports(&resolved);
276 }
277
278 pub fn lookup_path(&self, raw: &str) -> Option<FileId> {
281 if let Some(id) = self.path_to_id.get(raw) {
282 return Some(*id);
283 }
284 let path = Path::new(raw);
285 if path.is_absolute() {
286 if let Some(rel) = relative_path(&self.root, path) {
287 if let Some(id) = self.path_to_id.get(&rel) {
288 return Some(*id);
289 }
290 }
291 }
292 None
293 }
294
295 pub fn estimated_bytes(&self) -> usize {
298 let file_bytes: usize = self
299 .files
300 .values()
301 .map(|f| f.relative_path.len() + f.imports.iter().map(|s| s.len()).sum::<usize>() + 64)
302 .sum();
303 self.trigrams.estimated_bytes() + self.words.estimated_bytes() + file_bytes
304 }
305
306 pub fn absolute_path(&self, rel_or_abs: &str) -> Option<PathBuf> {
310 let p = Path::new(rel_or_abs);
311 let candidate = if p.is_absolute() {
312 p.to_path_buf()
313 } else {
314 self.root.join(p)
315 };
316 let canonical = canonicalize_existing(&candidate);
317 if canonical.strip_prefix(&self.root).is_ok() {
318 Some(canonical)
319 } else {
320 None
321 }
322 }
323
324 pub(crate) fn empty(root: PathBuf) -> Self {
327 Self {
328 root,
329 files: HashMap::new(),
330 path_to_id: HashMap::new(),
331 trigrams: TrigramIndex::new(),
332 words: WordIndex::new(),
333 deps: DepGraph::new(),
334 versions: VersionLog::new(),
335 agents: AgentRegistry::new(),
336 symbols: SymbolGraph::new(),
337 overlays: OverlayState::new(),
338 last_built_unix_ms: 0,
339 git_head: None,
340 next_id: 1,
341 }
342 }
343
344 pub(crate) fn next_file_id_internal(&self) -> FileId {
346 self.next_id
347 }
348
349 pub(crate) fn set_next_file_id(&mut self, id: FileId) {
351 self.next_id = id.max(1);
352 }
353}
354
355fn indexed_symbol_from_ast(sym: &AstSymbol) -> IndexedSymbol {
360 IndexedSymbol {
361 name: sym.name.clone(),
362 kind: sym.kind.as_str().to_string(),
363 start_line: sym.start_row.saturating_add(1),
364 end_line: sym.end_row.saturating_add(1),
365 signature: sym.signature.clone(),
366 }
367}
368
369pub(crate) fn now_unix_ms() -> i64 {
372 SystemTime::now()
373 .duration_since(UNIX_EPOCH)
374 .map(|d| d.as_millis() as i64)
375 .unwrap_or(0)
376}
377
378fn canonicalize(root: &Path) -> PathBuf {
379 std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
380}
381
382pub(crate) fn relative_path(root: &Path, abs: &Path) -> Option<String> {
388 let canonical_abs = canonicalize_existing(abs);
389 let stripped = canonical_abs.strip_prefix(root).ok()?;
390 Some(stripped.to_string_lossy().replace('\\', "/"))
391}
392
393fn canonicalize_existing(abs: &Path) -> PathBuf {
394 if let Ok(c) = std::fs::canonicalize(abs) {
395 return c;
396 }
397 let mut tail: Vec<&std::ffi::OsStr> = Vec::new();
400 let mut cursor = abs;
401 loop {
402 if cursor.exists() {
403 if let Ok(canonical) = std::fs::canonicalize(cursor) {
404 let mut out = canonical;
405 for piece in tail.iter().rev() {
406 out = out.join(piece);
407 }
408 return out;
409 }
410 break;
411 }
412 match (cursor.parent(), cursor.file_name()) {
413 (Some(parent), Some(name)) if !parent.as_os_str().is_empty() => {
414 tail.push(name);
415 cursor = parent;
416 }
417 _ => break,
418 }
419 }
420 abs.to_path_buf()
421}
422
423fn read_git_head(workspace_root: &Path) -> Option<String> {
424 let head = workspace_root.join(".git").join("HEAD");
425 let txt = std::fs::read_to_string(&head).ok()?;
426 let line = txt.trim().to_string();
427 if let Some(ref_target) = line.strip_prefix("ref: ") {
428 let ref_path = workspace_root.join(".git").join(ref_target);
429 if let Ok(sha) = std::fs::read_to_string(&ref_path) {
430 return Some(sha.trim().to_string());
431 }
432 }
433 Some(line)
434}
435
436#[cfg(test)]
437mod tests {
438 use super::*;
439 use std::fs;
440 use tempfile::tempdir;
441
442 #[test]
443 fn build_indexes_files_and_resolves_imports() {
444 let dir = tempdir().unwrap();
445 let root = dir.path();
446 fs::create_dir_all(root.join("src")).unwrap();
447 fs::write(
448 root.join("src/main.rs"),
449 "use crate::util::helper;\nfn main() {}\n",
450 )
451 .unwrap();
452 fs::write(root.join("src/util.rs"), "pub fn helper() {}").unwrap();
453
454 let (state, outcome) = IndexState::build_from_root(root);
455 assert_eq!(outcome.files_indexed, 2);
456 assert_eq!(state.files.len(), 2);
457 let main_id = state.path_to_id["src/main.rs"];
458 let util_id = state.path_to_id["src/util.rs"];
459 assert_eq!(state.deps.imports_of(main_id), Vec::<FileId>::new());
461 let _ = util_id;
462 }
463
464 #[test]
465 fn typescript_imports_get_resolved() {
466 let dir = tempdir().unwrap();
467 let root = dir.path();
468 fs::create_dir_all(root.join("src")).unwrap();
469 fs::write(
470 root.join("src/index.ts"),
471 "import { helper } from \"./util\";\n",
472 )
473 .unwrap();
474 fs::write(root.join("src/util.ts"), "export function helper() {}").unwrap();
475
476 let (state, _) = IndexState::build_from_root(root);
477 let index_id = state.path_to_id["src/index.ts"];
478 let util_id = state.path_to_id["src/util.ts"];
479 assert_eq!(state.deps.imports_of(index_id), vec![util_id]);
480 assert_eq!(state.deps.importers_of(util_id), vec![index_id]);
481 }
482
483 #[test]
484 fn lookup_path_handles_absolute_paths() {
485 let dir = tempdir().unwrap();
486 let root = dir.path();
487 fs::create_dir_all(root.join("a/b")).unwrap();
488 fs::write(root.join("a/b/c.py"), "x = 1\n").unwrap();
489 let (state, _) = IndexState::build_from_root(root);
490 let abs = root.join("a/b/c.py");
491 let id = state.lookup_path(abs.to_str().unwrap()).unwrap();
492 assert_eq!(state.path_to_id["a/b/c.py"], id);
493 }
494
495 #[test]
496 fn reindex_file_picks_up_changes_in_place() {
497 let dir = tempdir().unwrap();
498 let root = dir.path();
499 fs::create_dir_all(root.join("src")).unwrap();
500 fs::write(root.join("src/a.ts"), "export const x = 1;\n").unwrap();
501 let (mut state, _) = IndexState::build_from_root(root);
502 let id = state.path_to_id["src/a.ts"];
503 let before_hash = state.files[&id].content_hash;
504
505 fs::write(root.join("src/a.ts"), "export const x = 2;\n").unwrap();
506 let new_id = state.reindex_file(&root.join("src/a.ts")).unwrap();
507 assert_eq!(new_id, id, "file id should be stable across reindex");
508 let after_hash = state.files[&id].content_hash;
509 assert_ne!(before_hash, after_hash);
510 }
511
512 #[test]
513 fn reindex_file_removes_entry_when_path_disappears() {
514 let dir = tempdir().unwrap();
515 let root = dir.path();
516 fs::create_dir_all(root.join("src")).unwrap();
517 fs::write(root.join("src/a.ts"), "export const x = 1;\n").unwrap();
518 let (mut state, _) = IndexState::build_from_root(root);
519 assert!(state.path_to_id.contains_key("src/a.ts"));
520
521 fs::remove_file(root.join("src/a.ts")).unwrap();
522 let result = state.reindex_file(&root.join("src/a.ts"));
523 assert!(result.is_none());
524 assert!(!state.path_to_id.contains_key("src/a.ts"));
525 }
526}