harn_hostlib/code_index/
state.rs1use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10use std::time::{SystemTime, UNIX_EPOCH};
11
12use super::file_table::{fnv1a64, FileId, IndexedFile};
13use super::graph::DepGraph;
14use super::imports;
15use super::trigram::TrigramIndex;
16use super::walker::{is_indexable_file, language_for_extension, walk_indexable, MAX_FILE_BYTES};
17use super::words::WordIndex;
18
19pub struct IndexState {
22 pub root: PathBuf,
24 pub files: HashMap<FileId, IndexedFile>,
26 pub path_to_id: HashMap<String, FileId>,
28 pub trigrams: TrigramIndex,
30 pub words: WordIndex,
32 pub deps: DepGraph,
34 pub last_built_unix_ms: i64,
36 pub git_head: Option<String>,
38 next_id: FileId,
39}
40
41#[derive(Debug, Default)]
43pub struct BuildOutcome {
44 pub files_indexed: u64,
46 pub files_skipped: u64,
49}
50
51impl IndexState {
52 pub fn build_from_root(root: &Path) -> (Self, BuildOutcome) {
55 let canonical_root = canonicalize(root);
56 let mut state = IndexState {
57 root: canonical_root.clone(),
58 files: HashMap::new(),
59 path_to_id: HashMap::new(),
60 trigrams: TrigramIndex::new(),
61 words: WordIndex::new(),
62 deps: DepGraph::new(),
63 last_built_unix_ms: now_unix_ms(),
64 git_head: read_git_head(&canonical_root),
65 next_id: 1,
66 };
67 let mut outcome = BuildOutcome::default();
68 let mut to_resolve: Vec<(FileId, String)> = Vec::new();
69 walk_indexable(&canonical_root, |abs| match state.ingest(abs) {
70 Some(file_id) => {
71 outcome.files_indexed += 1;
72 if let Some(file) = state.files.get(&file_id) {
73 to_resolve.push((file_id, file.relative_path.clone()));
74 }
75 }
76 None => {
77 outcome.files_skipped += 1;
78 }
79 });
80 for (id, rel) in to_resolve {
81 state.rebuild_deps(id, &rel);
82 }
83 (state, outcome)
84 }
85
86 fn ingest(&mut self, abs: &Path) -> Option<FileId> {
87 if !is_indexable_file(abs) {
88 return None;
89 }
90 let metadata = std::fs::metadata(abs).ok()?;
91 if metadata.len() > MAX_FILE_BYTES {
92 return None;
93 }
94 let content = std::fs::read_to_string(abs).ok()?;
95 if content.len() > MAX_FILE_BYTES as usize {
96 return None;
97 }
98 let rel = relative_path(&self.root, abs)?;
99 let hash = fnv1a64(content.as_bytes());
100 let id = match self.path_to_id.get(&rel) {
101 Some(existing_id) => {
102 if let Some(file) = self.files.get(existing_id) {
103 if file.content_hash == hash {
104 return Some(*existing_id);
105 }
106 }
107 *existing_id
108 }
109 None => {
110 let id = self.next_id;
111 self.next_id = self.next_id.checked_add(1).expect("FileId overflow");
112 self.path_to_id.insert(rel.clone(), id);
113 id
114 }
115 };
116
117 let ext = abs
118 .extension()
119 .and_then(|s| s.to_str())
120 .unwrap_or("")
121 .to_ascii_lowercase();
122 let language = language_for_extension(&ext).to_string();
123 let imports = imports::extract_imports(&content, &language);
124 let mtime_ms = metadata
125 .modified()
126 .ok()
127 .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
128 .map(|d| d.as_millis() as i64)
129 .unwrap_or(0);
130 let line_count = if content.is_empty() {
131 0
132 } else {
133 content.split('\n').count() as u32
134 };
135
136 let file = IndexedFile {
137 id,
138 relative_path: rel,
139 language,
140 size_bytes: content.len() as u64,
141 line_count,
142 content_hash: hash,
143 mtime_ms,
144 symbols: Vec::new(),
145 imports,
146 };
147 self.trigrams.index_file(id, &content);
148 self.words.index_file(id, &content);
149 self.files.insert(id, file);
150 Some(id)
151 }
152
153 fn rebuild_deps(&mut self, id: FileId, relative_path: &str) {
154 let Some(file) = self.files.get(&id).cloned() else {
155 return;
156 };
157 let resolved = imports::resolve(
158 &file.imports,
159 relative_path,
160 &file.language,
161 &self.path_to_id,
162 );
163 self.deps
164 .set_edges(id, resolved.resolved, resolved.unresolved);
165 }
166
167 pub fn lookup_path(&self, raw: &str) -> Option<FileId> {
170 if let Some(id) = self.path_to_id.get(raw) {
171 return Some(*id);
172 }
173 let path = Path::new(raw);
174 if path.is_absolute() {
175 if let Some(rel) = relative_path(&self.root, path) {
176 if let Some(id) = self.path_to_id.get(&rel) {
177 return Some(*id);
178 }
179 }
180 }
181 None
182 }
183
184 pub fn estimated_bytes(&self) -> usize {
187 let file_bytes: usize = self
188 .files
189 .values()
190 .map(|f| f.relative_path.len() + f.imports.iter().map(|s| s.len()).sum::<usize>() + 64)
191 .sum();
192 self.trigrams.estimated_bytes() + self.words.estimated_bytes() + file_bytes
193 }
194}
195
196fn now_unix_ms() -> i64 {
197 SystemTime::now()
198 .duration_since(UNIX_EPOCH)
199 .map(|d| d.as_millis() as i64)
200 .unwrap_or(0)
201}
202
203fn canonicalize(root: &Path) -> PathBuf {
204 std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf())
205}
206
207pub(crate) fn relative_path(root: &Path, abs: &Path) -> Option<String> {
210 let canonical_abs = std::fs::canonicalize(abs).unwrap_or_else(|_| abs.to_path_buf());
211 let stripped = canonical_abs.strip_prefix(root).ok()?;
212 Some(stripped.to_string_lossy().replace('\\', "/"))
213}
214
215fn read_git_head(workspace_root: &Path) -> Option<String> {
216 let head = workspace_root.join(".git").join("HEAD");
217 let txt = std::fs::read_to_string(&head).ok()?;
218 let line = txt.trim().to_string();
219 if let Some(ref_target) = line.strip_prefix("ref: ") {
220 let ref_path = workspace_root.join(".git").join(ref_target);
221 if let Ok(sha) = std::fs::read_to_string(&ref_path) {
222 return Some(sha.trim().to_string());
223 }
224 }
225 Some(line)
226}
227
228#[cfg(test)]
229mod tests {
230 use super::*;
231 use std::fs;
232 use tempfile::tempdir;
233
234 #[test]
235 fn build_indexes_files_and_resolves_imports() {
236 let dir = tempdir().unwrap();
237 let root = dir.path();
238 fs::create_dir_all(root.join("src")).unwrap();
239 fs::write(
240 root.join("src/main.rs"),
241 "use crate::util::helper;\nfn main() {}\n",
242 )
243 .unwrap();
244 fs::write(root.join("src/util.rs"), "pub fn helper() {}").unwrap();
245
246 let (state, outcome) = IndexState::build_from_root(root);
247 assert_eq!(outcome.files_indexed, 2);
248 assert_eq!(state.files.len(), 2);
249 let main_id = state.path_to_id["src/main.rs"];
250 let util_id = state.path_to_id["src/util.rs"];
251 assert_eq!(state.deps.imports_of(main_id), Vec::<FileId>::new());
253 let _ = util_id;
254 }
255
256 #[test]
257 fn typescript_imports_get_resolved() {
258 let dir = tempdir().unwrap();
259 let root = dir.path();
260 fs::create_dir_all(root.join("src")).unwrap();
261 fs::write(
262 root.join("src/index.ts"),
263 "import { helper } from \"./util\";\n",
264 )
265 .unwrap();
266 fs::write(root.join("src/util.ts"), "export function helper() {}").unwrap();
267
268 let (state, _) = IndexState::build_from_root(root);
269 let index_id = state.path_to_id["src/index.ts"];
270 let util_id = state.path_to_id["src/util.ts"];
271 assert_eq!(state.deps.imports_of(index_id), vec![util_id]);
272 assert_eq!(state.deps.importers_of(util_id), vec![index_id]);
273 }
274
275 #[test]
276 fn lookup_path_handles_absolute_paths() {
277 let dir = tempdir().unwrap();
278 let root = dir.path();
279 fs::create_dir_all(root.join("a/b")).unwrap();
280 fs::write(root.join("a/b/c.py"), "x = 1\n").unwrap();
281 let (state, _) = IndexState::build_from_root(root);
282 let abs = root.join("a/b/c.py");
283 let id = state.lookup_path(abs.to_str().unwrap()).unwrap();
284 assert_eq!(state.path_to_id["a/b/c.py"], id);
285 }
286}