use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use super::agents::AgentRegistry;
use super::file_table::{fnv1a64, FileId, IndexedFile};
use super::graph::DepGraph;
use super::imports;
use super::trigram::TrigramIndex;
use super::versions::VersionLog;
use super::walker::{is_indexable_file, language_for_extension, walk_indexable, MAX_FILE_BYTES};
use super::words::WordIndex;
pub struct IndexState {
pub root: PathBuf,
pub files: HashMap<FileId, IndexedFile>,
pub path_to_id: HashMap<String, FileId>,
pub trigrams: TrigramIndex,
pub words: WordIndex,
pub deps: DepGraph,
pub versions: VersionLog,
pub agents: AgentRegistry,
pub last_built_unix_ms: i64,
pub git_head: Option<String>,
next_id: FileId,
}
#[derive(Debug, Default)]
pub struct BuildOutcome {
pub files_indexed: u64,
pub files_skipped: u64,
}
impl IndexState {
pub fn build_from_root(root: &Path) -> (Self, BuildOutcome) {
let canonical_root = canonicalize(root);
let mut state = IndexState {
root: canonical_root.clone(),
files: HashMap::new(),
path_to_id: HashMap::new(),
trigrams: TrigramIndex::new(),
words: WordIndex::new(),
deps: DepGraph::new(),
versions: VersionLog::new(),
agents: AgentRegistry::new(),
last_built_unix_ms: now_unix_ms(),
git_head: read_git_head(&canonical_root),
next_id: 1,
};
let mut outcome = BuildOutcome::default();
let mut to_resolve: Vec<(FileId, String)> = Vec::new();
walk_indexable(&canonical_root, |abs| match state.ingest(abs) {
Some(file_id) => {
outcome.files_indexed += 1;
if let Some(file) = state.files.get(&file_id) {
to_resolve.push((file_id, file.relative_path.clone()));
}
}
None => {
outcome.files_skipped += 1;
}
});
for (id, rel) in to_resolve {
state.rebuild_deps(id, &rel);
}
(state, outcome)
}
pub fn reindex_file(&mut self, abs: &Path) -> Option<FileId> {
if !abs.exists() {
self.remove_file_path(abs);
return None;
}
if !is_indexable_file(abs) || super::walker::is_sensitive_path(abs) {
self.remove_file_path(abs);
return None;
}
let id = self.ingest(abs)?;
let rel = self
.files
.get(&id)
.map(|f| f.relative_path.clone())
.unwrap_or_default();
if !rel.is_empty() {
self.rebuild_deps(id, &rel);
}
Some(id)
}
pub fn remove_file_path(&mut self, abs: &Path) {
let Some(rel) = relative_path(&self.root, abs) else {
return;
};
let Some(id) = self.path_to_id.remove(&rel) else {
return;
};
self.files.remove(&id);
self.trigrams.remove_file(id);
self.words.remove_file(id);
self.deps.remove_file(id);
}
fn ingest(&mut self, abs: &Path) -> Option<FileId> {
if !is_indexable_file(abs) {
return None;
}
let metadata = std::fs::metadata(abs).ok()?;
if metadata.len() > MAX_FILE_BYTES {
return None;
}
let content = std::fs::read_to_string(abs).ok()?;
if content.len() > MAX_FILE_BYTES as usize {
return None;
}
let rel = relative_path(&self.root, abs)?;
let hash = fnv1a64(content.as_bytes());
let id = match self.path_to_id.get(&rel) {
Some(existing_id) => {
if let Some(file) = self.files.get(existing_id) {
if file.content_hash == hash {
return Some(*existing_id);
}
}
*existing_id
}
None => {
let id = self.next_id;
self.next_id = self.next_id.checked_add(1).expect("FileId overflow");
self.path_to_id.insert(rel.clone(), id);
id
}
};
let ext = abs
.extension()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_ascii_lowercase();
let language = language_for_extension(&ext).to_string();
let imports = imports::extract_imports(&content, &language);
let mtime_ms = metadata
.modified()
.ok()
.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
.map(|d| d.as_millis() as i64)
.unwrap_or(0);
let line_count = if content.is_empty() {
0
} else {
content.split('\n').count() as u32
};
let file = IndexedFile {
id,
relative_path: rel,
language,
size_bytes: content.len() as u64,
line_count,
content_hash: hash,
mtime_ms,
symbols: Vec::new(),
imports,
};
self.trigrams.index_file(id, &content);
self.words.index_file(id, &content);
self.files.insert(id, file);
Some(id)
}
fn rebuild_deps(&mut self, id: FileId, relative_path: &str) {
let Some(file) = self.files.get(&id).cloned() else {
return;
};
let resolved = imports::resolve(
&file.imports,
relative_path,
&file.language,
&self.path_to_id,
);
self.deps
.set_edges(id, resolved.resolved, resolved.unresolved);
}
pub fn lookup_path(&self, raw: &str) -> Option<FileId> {
if let Some(id) = self.path_to_id.get(raw) {
return Some(*id);
}
let path = Path::new(raw);
if path.is_absolute() {
if let Some(rel) = relative_path(&self.root, path) {
if let Some(id) = self.path_to_id.get(&rel) {
return Some(*id);
}
}
}
None
}
pub fn estimated_bytes(&self) -> usize {
let file_bytes: usize = self
.files
.values()
.map(|f| f.relative_path.len() + f.imports.iter().map(|s| s.len()).sum::<usize>() + 64)
.sum();
self.trigrams.estimated_bytes() + self.words.estimated_bytes() + file_bytes
}
pub fn absolute_path(&self, rel_or_abs: &str) -> Option<PathBuf> {
let p = Path::new(rel_or_abs);
let candidate = if p.is_absolute() {
p.to_path_buf()
} else {
self.root.join(p)
};
let canonical = canonicalize_existing(&candidate);
if canonical.strip_prefix(&self.root).is_ok() {
Some(canonical)
} else {
None
}
}
pub(crate) fn empty(root: PathBuf) -> Self {
Self {
root,
files: HashMap::new(),
path_to_id: HashMap::new(),
trigrams: TrigramIndex::new(),
words: WordIndex::new(),
deps: DepGraph::new(),
versions: VersionLog::new(),
agents: AgentRegistry::new(),
last_built_unix_ms: 0,
git_head: None,
next_id: 1,
}
}
pub(crate) fn next_file_id_internal(&self) -> FileId {
self.next_id
}
pub(crate) fn set_next_file_id(&mut self, id: FileId) {
self.next_id = id.max(1);
}
}
pub(crate) fn now_unix_ms() -> i64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis() as i64)
.unwrap_or(0)
}
fn canonicalize(root: &Path) -> PathBuf {
std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
}
pub(crate) fn relative_path(root: &Path, abs: &Path) -> Option<String> {
let canonical_abs = canonicalize_existing(abs);
let stripped = canonical_abs.strip_prefix(root).ok()?;
Some(stripped.to_string_lossy().replace('\\', "/"))
}
fn canonicalize_existing(abs: &Path) -> PathBuf {
if let Ok(c) = std::fs::canonicalize(abs) {
return c;
}
let mut tail: Vec<&std::ffi::OsStr> = Vec::new();
let mut cursor = abs;
loop {
if cursor.exists() {
if let Ok(canonical) = std::fs::canonicalize(cursor) {
let mut out = canonical;
for piece in tail.iter().rev() {
out = out.join(piece);
}
return out;
}
break;
}
match (cursor.parent(), cursor.file_name()) {
(Some(parent), Some(name)) if !parent.as_os_str().is_empty() => {
tail.push(name);
cursor = parent;
}
_ => break,
}
}
abs.to_path_buf()
}
fn read_git_head(workspace_root: &Path) -> Option<String> {
let head = workspace_root.join(".git").join("HEAD");
let txt = std::fs::read_to_string(&head).ok()?;
let line = txt.trim().to_string();
if let Some(ref_target) = line.strip_prefix("ref: ") {
let ref_path = workspace_root.join(".git").join(ref_target);
if let Ok(sha) = std::fs::read_to_string(&ref_path) {
return Some(sha.trim().to_string());
}
}
Some(line)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn build_indexes_files_and_resolves_imports() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(
root.join("src/main.rs"),
"use crate::util::helper;\nfn main() {}\n",
)
.unwrap();
fs::write(root.join("src/util.rs"), "pub fn helper() {}").unwrap();
let (state, outcome) = IndexState::build_from_root(root);
assert_eq!(outcome.files_indexed, 2);
assert_eq!(state.files.len(), 2);
let main_id = state.path_to_id["src/main.rs"];
let util_id = state.path_to_id["src/util.rs"];
assert_eq!(state.deps.imports_of(main_id), Vec::<FileId>::new());
let _ = util_id;
}
#[test]
fn typescript_imports_get_resolved() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(
root.join("src/index.ts"),
"import { helper } from \"./util\";\n",
)
.unwrap();
fs::write(root.join("src/util.ts"), "export function helper() {}").unwrap();
let (state, _) = IndexState::build_from_root(root);
let index_id = state.path_to_id["src/index.ts"];
let util_id = state.path_to_id["src/util.ts"];
assert_eq!(state.deps.imports_of(index_id), vec![util_id]);
assert_eq!(state.deps.importers_of(util_id), vec![index_id]);
}
#[test]
fn lookup_path_handles_absolute_paths() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("a/b")).unwrap();
fs::write(root.join("a/b/c.py"), "x = 1\n").unwrap();
let (state, _) = IndexState::build_from_root(root);
let abs = root.join("a/b/c.py");
let id = state.lookup_path(abs.to_str().unwrap()).unwrap();
assert_eq!(state.path_to_id["a/b/c.py"], id);
}
#[test]
fn reindex_file_picks_up_changes_in_place() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/a.ts"), "export const x = 1;\n").unwrap();
let (mut state, _) = IndexState::build_from_root(root);
let id = state.path_to_id["src/a.ts"];
let before_hash = state.files[&id].content_hash;
fs::write(root.join("src/a.ts"), "export const x = 2;\n").unwrap();
let new_id = state.reindex_file(&root.join("src/a.ts")).unwrap();
assert_eq!(new_id, id, "file id should be stable across reindex");
let after_hash = state.files[&id].content_hash;
assert_ne!(before_hash, after_hash);
}
#[test]
fn reindex_file_removes_entry_when_path_disappears() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(root.join("src/a.ts"), "export const x = 1;\n").unwrap();
let (mut state, _) = IndexState::build_from_root(root);
assert!(state.path_to_id.contains_key("src/a.ts"));
fs::remove_file(root.join("src/a.ts")).unwrap();
let result = state.reindex_file(&root.join("src/a.ts"));
assert!(result.is_none());
assert!(!state.path_to_id.contains_key("src/a.ts"));
}
}