use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use super::file_table::{fnv1a64, FileId, IndexedFile};
use super::graph::DepGraph;
use super::imports;
use super::trigram::TrigramIndex;
use super::walker::{is_indexable_file, language_for_extension, walk_indexable, MAX_FILE_BYTES};
use super::words::WordIndex;
pub struct IndexState {
pub root: PathBuf,
pub files: HashMap<FileId, IndexedFile>,
pub path_to_id: HashMap<String, FileId>,
pub trigrams: TrigramIndex,
pub words: WordIndex,
pub deps: DepGraph,
pub last_built_unix_ms: i64,
pub git_head: Option<String>,
next_id: FileId,
}
#[derive(Debug, Default)]
pub struct BuildOutcome {
pub files_indexed: u64,
pub files_skipped: u64,
}
impl IndexState {
pub fn build_from_root(root: &Path) -> (Self, BuildOutcome) {
let canonical_root = canonicalize(root);
let mut state = IndexState {
root: canonical_root.clone(),
files: HashMap::new(),
path_to_id: HashMap::new(),
trigrams: TrigramIndex::new(),
words: WordIndex::new(),
deps: DepGraph::new(),
last_built_unix_ms: now_unix_ms(),
git_head: read_git_head(&canonical_root),
next_id: 1,
};
let mut outcome = BuildOutcome::default();
let mut to_resolve: Vec<(FileId, String)> = Vec::new();
walk_indexable(&canonical_root, |abs| match state.ingest(abs) {
Some(file_id) => {
outcome.files_indexed += 1;
if let Some(file) = state.files.get(&file_id) {
to_resolve.push((file_id, file.relative_path.clone()));
}
}
None => {
outcome.files_skipped += 1;
}
});
for (id, rel) in to_resolve {
state.rebuild_deps(id, &rel);
}
(state, outcome)
}
fn ingest(&mut self, abs: &Path) -> Option<FileId> {
if !is_indexable_file(abs) {
return None;
}
let metadata = std::fs::metadata(abs).ok()?;
if metadata.len() > MAX_FILE_BYTES {
return None;
}
let content = std::fs::read_to_string(abs).ok()?;
if content.len() > MAX_FILE_BYTES as usize {
return None;
}
let rel = relative_path(&self.root, abs)?;
let hash = fnv1a64(content.as_bytes());
let id = match self.path_to_id.get(&rel) {
Some(existing_id) => {
if let Some(file) = self.files.get(existing_id) {
if file.content_hash == hash {
return Some(*existing_id);
}
}
*existing_id
}
None => {
let id = self.next_id;
self.next_id = self.next_id.checked_add(1).expect("FileId overflow");
self.path_to_id.insert(rel.clone(), id);
id
}
};
let ext = abs
.extension()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_ascii_lowercase();
let language = language_for_extension(&ext).to_string();
let imports = imports::extract_imports(&content, &language);
let mtime_ms = metadata
.modified()
.ok()
.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
.map(|d| d.as_millis() as i64)
.unwrap_or(0);
let line_count = if content.is_empty() {
0
} else {
content.split('\n').count() as u32
};
let file = IndexedFile {
id,
relative_path: rel,
language,
size_bytes: content.len() as u64,
line_count,
content_hash: hash,
mtime_ms,
symbols: Vec::new(),
imports,
};
self.trigrams.index_file(id, &content);
self.words.index_file(id, &content);
self.files.insert(id, file);
Some(id)
}
fn rebuild_deps(&mut self, id: FileId, relative_path: &str) {
let Some(file) = self.files.get(&id).cloned() else {
return;
};
let resolved = imports::resolve(
&file.imports,
relative_path,
&file.language,
&self.path_to_id,
);
self.deps
.set_edges(id, resolved.resolved, resolved.unresolved);
}
pub fn lookup_path(&self, raw: &str) -> Option<FileId> {
if let Some(id) = self.path_to_id.get(raw) {
return Some(*id);
}
let path = Path::new(raw);
if path.is_absolute() {
if let Some(rel) = relative_path(&self.root, path) {
if let Some(id) = self.path_to_id.get(&rel) {
return Some(*id);
}
}
}
None
}
pub fn estimated_bytes(&self) -> usize {
let file_bytes: usize = self
.files
.values()
.map(|f| f.relative_path.len() + f.imports.iter().map(|s| s.len()).sum::<usize>() + 64)
.sum();
self.trigrams.estimated_bytes() + self.words.estimated_bytes() + file_bytes
}
}
fn now_unix_ms() -> i64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_millis() as i64)
.unwrap_or(0)
}
fn canonicalize(root: &Path) -> PathBuf {
std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
}
pub(crate) fn relative_path(root: &Path, abs: &Path) -> Option<String> {
let canonical_abs = std::fs::canonicalize(abs).unwrap_or_else(|_| abs.to_path_buf());
let stripped = canonical_abs.strip_prefix(root).ok()?;
Some(stripped.to_string_lossy().replace('\\', "/"))
}
fn read_git_head(workspace_root: &Path) -> Option<String> {
let head = workspace_root.join(".git").join("HEAD");
let txt = std::fs::read_to_string(&head).ok()?;
let line = txt.trim().to_string();
if let Some(ref_target) = line.strip_prefix("ref: ") {
let ref_path = workspace_root.join(".git").join(ref_target);
if let Ok(sha) = std::fs::read_to_string(&ref_path) {
return Some(sha.trim().to_string());
}
}
Some(line)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn build_indexes_files_and_resolves_imports() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(
root.join("src/main.rs"),
"use crate::util::helper;\nfn main() {}\n",
)
.unwrap();
fs::write(root.join("src/util.rs"), "pub fn helper() {}").unwrap();
let (state, outcome) = IndexState::build_from_root(root);
assert_eq!(outcome.files_indexed, 2);
assert_eq!(state.files.len(), 2);
let main_id = state.path_to_id["src/main.rs"];
let util_id = state.path_to_id["src/util.rs"];
assert_eq!(state.deps.imports_of(main_id), Vec::<FileId>::new());
let _ = util_id;
}
#[test]
fn typescript_imports_get_resolved() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::write(
root.join("src/index.ts"),
"import { helper } from \"./util\";\n",
)
.unwrap();
fs::write(root.join("src/util.ts"), "export function helper() {}").unwrap();
let (state, _) = IndexState::build_from_root(root);
let index_id = state.path_to_id["src/index.ts"];
let util_id = state.path_to_id["src/util.ts"];
assert_eq!(state.deps.imports_of(index_id), vec![util_id]);
assert_eq!(state.deps.importers_of(util_id), vec![index_id]);
}
#[test]
fn lookup_path_handles_absolute_paths() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("a/b")).unwrap();
fs::write(root.join("a/b/c.py"), "x = 1\n").unwrap();
let (state, _) = IndexState::build_from_root(root);
let abs = root.join("a/b/c.py");
let id = state.lookup_path(abs.to_str().unwrap()).unwrap();
assert_eq!(state.path_to_id["a/b/c.py"], id);
}
}