1use std::collections::HashMap;
14use std::path::{Path, PathBuf};
15use std::time::{SystemTime, UNIX_EPOCH};
16
17use super::agents::AgentRegistry;
18use super::file_table::{fnv1a64, FileId, IndexedFile};
19use super::graph::DepGraph;
20use super::imports;
21use super::trigram::TrigramIndex;
22use super::versions::VersionLog;
23use super::walker::{is_indexable_file, language_for_extension, walk_indexable, MAX_FILE_BYTES};
24use super::words::WordIndex;
25
26pub struct IndexState {
30 pub root: PathBuf,
32 pub files: HashMap<FileId, IndexedFile>,
34 pub path_to_id: HashMap<String, FileId>,
36 pub trigrams: TrigramIndex,
38 pub words: WordIndex,
40 pub deps: DepGraph,
42 pub versions: VersionLog,
44 pub agents: AgentRegistry,
46 pub last_built_unix_ms: i64,
48 pub git_head: Option<String>,
50 next_id: FileId,
51}
52
53#[derive(Debug, Default)]
55pub struct BuildOutcome {
56 pub files_indexed: u64,
58 pub files_skipped: u64,
61}
62
63impl IndexState {
64 pub fn build_from_root(root: &Path) -> (Self, BuildOutcome) {
67 let canonical_root = canonicalize(root);
68 let mut state = IndexState {
69 root: canonical_root.clone(),
70 files: HashMap::new(),
71 path_to_id: HashMap::new(),
72 trigrams: TrigramIndex::new(),
73 words: WordIndex::new(),
74 deps: DepGraph::new(),
75 versions: VersionLog::new(),
76 agents: AgentRegistry::new(),
77 last_built_unix_ms: now_unix_ms(),
78 git_head: read_git_head(&canonical_root),
79 next_id: 1,
80 };
81 let mut outcome = BuildOutcome::default();
82 let mut to_resolve: Vec<(FileId, String)> = Vec::new();
83 walk_indexable(&canonical_root, |abs| match state.ingest(abs) {
84 Some(file_id) => {
85 outcome.files_indexed += 1;
86 if let Some(file) = state.files.get(&file_id) {
87 to_resolve.push((file_id, file.relative_path.clone()));
88 }
89 }
90 None => {
91 outcome.files_skipped += 1;
92 }
93 });
94 for (id, rel) in to_resolve {
95 state.rebuild_deps(id, &rel);
96 }
97 (state, outcome)
98 }
99
100 pub fn reindex_file(&mut self, abs: &Path) -> Option<FileId> {
105 if !abs.exists() {
106 self.remove_file_path(abs);
107 return None;
108 }
109 if !is_indexable_file(abs) || super::walker::is_sensitive_path(abs) {
110 self.remove_file_path(abs);
111 return None;
112 }
113 let id = self.ingest(abs)?;
114 let rel = self
115 .files
116 .get(&id)
117 .map(|f| f.relative_path.clone())
118 .unwrap_or_default();
119 if !rel.is_empty() {
120 self.rebuild_deps(id, &rel);
121 }
122 Some(id)
123 }
124
125 pub fn remove_file_path(&mut self, abs: &Path) {
128 let Some(rel) = relative_path(&self.root, abs) else {
129 return;
130 };
131 let Some(id) = self.path_to_id.remove(&rel) else {
132 return;
133 };
134 self.files.remove(&id);
135 self.trigrams.remove_file(id);
136 self.words.remove_file(id);
137 self.deps.remove_file(id);
138 }
139
140 fn ingest(&mut self, abs: &Path) -> Option<FileId> {
141 if !is_indexable_file(abs) {
142 return None;
143 }
144 let metadata = std::fs::metadata(abs).ok()?;
145 if metadata.len() > MAX_FILE_BYTES {
146 return None;
147 }
148 let content = std::fs::read_to_string(abs).ok()?;
149 if content.len() > MAX_FILE_BYTES as usize {
150 return None;
151 }
152 let rel = relative_path(&self.root, abs)?;
153 let hash = fnv1a64(content.as_bytes());
154 let id = match self.path_to_id.get(&rel) {
155 Some(existing_id) => {
156 if let Some(file) = self.files.get(existing_id) {
157 if file.content_hash == hash {
158 return Some(*existing_id);
159 }
160 }
161 *existing_id
162 }
163 None => {
164 let id = self.next_id;
165 self.next_id = self.next_id.checked_add(1).expect("FileId overflow");
166 self.path_to_id.insert(rel.clone(), id);
167 id
168 }
169 };
170
171 let ext = abs
172 .extension()
173 .and_then(|s| s.to_str())
174 .unwrap_or("")
175 .to_ascii_lowercase();
176 let language = language_for_extension(&ext).to_string();
177 let imports = imports::extract_imports(&content, &language);
178 let mtime_ms = metadata
179 .modified()
180 .ok()
181 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
182 .map(|d| d.as_millis() as i64)
183 .unwrap_or(0);
184 let line_count = if content.is_empty() {
185 0
186 } else {
187 content.split('\n').count() as u32
188 };
189
190 let file = IndexedFile {
191 id,
192 relative_path: rel,
193 language,
194 size_bytes: content.len() as u64,
195 line_count,
196 content_hash: hash,
197 mtime_ms,
198 symbols: Vec::new(),
199 imports,
200 };
201 self.trigrams.index_file(id, &content);
202 self.words.index_file(id, &content);
203 self.files.insert(id, file);
204 Some(id)
205 }
206
207 fn rebuild_deps(&mut self, id: FileId, relative_path: &str) {
208 let Some(file) = self.files.get(&id).cloned() else {
209 return;
210 };
211 let resolved = imports::resolve(
212 &file.imports,
213 relative_path,
214 &file.language,
215 &self.path_to_id,
216 );
217 self.deps
218 .set_edges(id, resolved.resolved, resolved.unresolved);
219 }
220
221 pub fn lookup_path(&self, raw: &str) -> Option<FileId> {
224 if let Some(id) = self.path_to_id.get(raw) {
225 return Some(*id);
226 }
227 let path = Path::new(raw);
228 if path.is_absolute() {
229 if let Some(rel) = relative_path(&self.root, path) {
230 if let Some(id) = self.path_to_id.get(&rel) {
231 return Some(*id);
232 }
233 }
234 }
235 None
236 }
237
238 pub fn estimated_bytes(&self) -> usize {
241 let file_bytes: usize = self
242 .files
243 .values()
244 .map(|f| f.relative_path.len() + f.imports.iter().map(|s| s.len()).sum::<usize>() + 64)
245 .sum();
246 self.trigrams.estimated_bytes() + self.words.estimated_bytes() + file_bytes
247 }
248
249 pub fn absolute_path(&self, rel_or_abs: &str) -> Option<PathBuf> {
253 let p = Path::new(rel_or_abs);
254 let candidate = if p.is_absolute() {
255 p.to_path_buf()
256 } else {
257 self.root.join(p)
258 };
259 let canonical = canonicalize_existing(&candidate);
260 if canonical.strip_prefix(&self.root).is_ok() {
261 Some(canonical)
262 } else {
263 None
264 }
265 }
266
267 pub(crate) fn empty(root: PathBuf) -> Self {
270 Self {
271 root,
272 files: HashMap::new(),
273 path_to_id: HashMap::new(),
274 trigrams: TrigramIndex::new(),
275 words: WordIndex::new(),
276 deps: DepGraph::new(),
277 versions: VersionLog::new(),
278 agents: AgentRegistry::new(),
279 last_built_unix_ms: 0,
280 git_head: None,
281 next_id: 1,
282 }
283 }
284
285 pub(crate) fn next_file_id_internal(&self) -> FileId {
287 self.next_id
288 }
289
290 pub(crate) fn set_next_file_id(&mut self, id: FileId) {
292 self.next_id = id.max(1);
293 }
294}
295
296pub(crate) fn now_unix_ms() -> i64 {
299 SystemTime::now()
300 .duration_since(UNIX_EPOCH)
301 .map(|d| d.as_millis() as i64)
302 .unwrap_or(0)
303}
304
305fn canonicalize(root: &Path) -> PathBuf {
306 std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
307}
308
309pub(crate) fn relative_path(root: &Path, abs: &Path) -> Option<String> {
315 let canonical_abs = canonicalize_existing(abs);
316 let stripped = canonical_abs.strip_prefix(root).ok()?;
317 Some(stripped.to_string_lossy().replace('\\', "/"))
318}
319
320fn canonicalize_existing(abs: &Path) -> PathBuf {
321 if let Ok(c) = std::fs::canonicalize(abs) {
322 return c;
323 }
324 let mut tail: Vec<&std::ffi::OsStr> = Vec::new();
327 let mut cursor = abs;
328 loop {
329 if cursor.exists() {
330 if let Ok(canonical) = std::fs::canonicalize(cursor) {
331 let mut out = canonical;
332 for piece in tail.iter().rev() {
333 out = out.join(piece);
334 }
335 return out;
336 }
337 break;
338 }
339 match (cursor.parent(), cursor.file_name()) {
340 (Some(parent), Some(name)) if !parent.as_os_str().is_empty() => {
341 tail.push(name);
342 cursor = parent;
343 }
344 _ => break,
345 }
346 }
347 abs.to_path_buf()
348}
349
350fn read_git_head(workspace_root: &Path) -> Option<String> {
351 let head = workspace_root.join(".git").join("HEAD");
352 let txt = std::fs::read_to_string(&head).ok()?;
353 let line = txt.trim().to_string();
354 if let Some(ref_target) = line.strip_prefix("ref: ") {
355 let ref_path = workspace_root.join(".git").join(ref_target);
356 if let Ok(sha) = std::fs::read_to_string(&ref_path) {
357 return Some(sha.trim().to_string());
358 }
359 }
360 Some(line)
361}
362
363#[cfg(test)]
364mod tests {
365 use super::*;
366 use std::fs;
367 use tempfile::tempdir;
368
369 #[test]
370 fn build_indexes_files_and_resolves_imports() {
371 let dir = tempdir().unwrap();
372 let root = dir.path();
373 fs::create_dir_all(root.join("src")).unwrap();
374 fs::write(
375 root.join("src/main.rs"),
376 "use crate::util::helper;\nfn main() {}\n",
377 )
378 .unwrap();
379 fs::write(root.join("src/util.rs"), "pub fn helper() {}").unwrap();
380
381 let (state, outcome) = IndexState::build_from_root(root);
382 assert_eq!(outcome.files_indexed, 2);
383 assert_eq!(state.files.len(), 2);
384 let main_id = state.path_to_id["src/main.rs"];
385 let util_id = state.path_to_id["src/util.rs"];
386 assert_eq!(state.deps.imports_of(main_id), Vec::<FileId>::new());
388 let _ = util_id;
389 }
390
391 #[test]
392 fn typescript_imports_get_resolved() {
393 let dir = tempdir().unwrap();
394 let root = dir.path();
395 fs::create_dir_all(root.join("src")).unwrap();
396 fs::write(
397 root.join("src/index.ts"),
398 "import { helper } from \"./util\";\n",
399 )
400 .unwrap();
401 fs::write(root.join("src/util.ts"), "export function helper() {}").unwrap();
402
403 let (state, _) = IndexState::build_from_root(root);
404 let index_id = state.path_to_id["src/index.ts"];
405 let util_id = state.path_to_id["src/util.ts"];
406 assert_eq!(state.deps.imports_of(index_id), vec![util_id]);
407 assert_eq!(state.deps.importers_of(util_id), vec![index_id]);
408 }
409
410 #[test]
411 fn lookup_path_handles_absolute_paths() {
412 let dir = tempdir().unwrap();
413 let root = dir.path();
414 fs::create_dir_all(root.join("a/b")).unwrap();
415 fs::write(root.join("a/b/c.py"), "x = 1\n").unwrap();
416 let (state, _) = IndexState::build_from_root(root);
417 let abs = root.join("a/b/c.py");
418 let id = state.lookup_path(abs.to_str().unwrap()).unwrap();
419 assert_eq!(state.path_to_id["a/b/c.py"], id);
420 }
421
422 #[test]
423 fn reindex_file_picks_up_changes_in_place() {
424 let dir = tempdir().unwrap();
425 let root = dir.path();
426 fs::create_dir_all(root.join("src")).unwrap();
427 fs::write(root.join("src/a.ts"), "export const x = 1;\n").unwrap();
428 let (mut state, _) = IndexState::build_from_root(root);
429 let id = state.path_to_id["src/a.ts"];
430 let before_hash = state.files[&id].content_hash;
431
432 fs::write(root.join("src/a.ts"), "export const x = 2;\n").unwrap();
433 let new_id = state.reindex_file(&root.join("src/a.ts")).unwrap();
434 assert_eq!(new_id, id, "file id should be stable across reindex");
435 let after_hash = state.files[&id].content_hash;
436 assert_ne!(before_hash, after_hash);
437 }
438
439 #[test]
440 fn reindex_file_removes_entry_when_path_disappears() {
441 let dir = tempdir().unwrap();
442 let root = dir.path();
443 fs::create_dir_all(root.join("src")).unwrap();
444 fs::write(root.join("src/a.ts"), "export const x = 1;\n").unwrap();
445 let (mut state, _) = IndexState::build_from_root(root);
446 assert!(state.path_to_id.contains_key("src/a.ts"));
447
448 fs::remove_file(root.join("src/a.ts")).unwrap();
449 let result = state.reindex_file(&root.join("src/a.ts"));
450 assert!(result.is_none());
451 assert!(!state.path_to_id.contains_key("src/a.ts"));
452 }
453}