1use crate::index::chunker::CodeChunk;
7use crate::index::incremental::ChangeDetector;
8use crate::index::parser::{CodeParser, ParseResult};
9use crate::index::resolver::{ReferenceResolver, ResolvedEdge, UnresolvedRef};
10use crate::index::symbol::{Reference, Symbol};
11use ignore::WalkBuilder;
12use std::collections::HashSet;
13use std::path::{Path, PathBuf};
14
15#[derive(Debug, Clone)]
17pub struct IndexProgress {
18 pub files_scanned: usize,
20 pub files_parsed: usize,
22 pub total_symbols: usize,
24 pub current_file: String,
26}
27
28#[derive(Debug)]
30pub struct IndexResult {
31 pub files_scanned: usize,
33 pub files_parsed: usize,
35 pub files_skipped: usize,
37 pub total_symbols: usize,
39 pub total_references: usize,
41 pub total_chunks: usize,
43 pub parse_results: Vec<ParseResult>,
45}
46
47#[derive(Debug)]
49pub struct IndexAndResolveResult {
50 pub index: IndexResult,
52 pub symbols: Vec<Symbol>,
54 pub references: Vec<Reference>,
56 pub chunks: Vec<CodeChunk>,
58 pub file_paths: HashSet<String>,
60 pub edges: Vec<ResolvedEdge>,
62 pub unresolved: Vec<UnresolvedRef>,
65 pub root_path: PathBuf,
68 pub scip_build: Option<super::scip::graph_builder::ScipBuildResult>,
70}
71
72pub struct Indexer {
77 parser: CodeParser,
78 change_detector: ChangeDetector,
79}
80
81impl Indexer {
82 pub fn new() -> Self {
84 Self {
85 parser: CodeParser::new(),
86 change_detector: ChangeDetector::new(),
87 }
88 }
89
90 pub fn with_change_detector(change_detector: ChangeDetector) -> Self {
92 Self {
93 parser: CodeParser::new(),
94 change_detector,
95 }
96 }
97
98 pub fn change_detector(&self) -> &ChangeDetector {
100 &self.change_detector
101 }
102
103 pub fn change_detector_mut(&mut self) -> &mut ChangeDetector {
105 &mut self.change_detector
106 }
107
108 pub fn index_directory(
114 &mut self,
115 root: &Path,
116 ) -> Result<IndexResult, codemem_core::CodememError> {
117 self.index_directory_inner(root, None)
118 }
119
120 pub fn index_directory_with_progress(
125 &mut self,
126 root: &Path,
127 tx: Option<&tokio::sync::broadcast::Sender<IndexProgress>>,
128 ) -> Result<IndexResult, codemem_core::CodememError> {
129 self.index_directory_inner(root, tx)
130 }
131
132 fn index_directory_inner(
134 &mut self,
135 root: &Path,
136 tx: Option<&tokio::sync::broadcast::Sender<IndexProgress>>,
137 ) -> Result<IndexResult, codemem_core::CodememError> {
138 let mut files_scanned = 0usize;
139 let mut files_parsed = 0usize;
140 let mut files_skipped = 0usize;
141 let mut total_symbols = 0usize;
142 let mut total_references = 0usize;
143 let mut total_chunks = 0usize;
144 let mut parse_results = Vec::new();
145
146 let walker = WalkBuilder::new(root)
147 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .build();
152
153 for entry in walker {
154 let entry = match entry {
155 Ok(e) => e,
156 Err(err) => {
157 tracing::warn!("Walk error: {}", err);
158 continue;
159 }
160 };
161
162 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
164 continue;
165 }
166
167 let path = entry.path();
168
169 let ext = match path.extension().and_then(|e| e.to_str()) {
171 Some(e) => e,
172 None => continue,
173 };
174
175 if !self.parser.supports_extension(ext) {
176 continue;
177 }
178
179 files_scanned += 1;
180
181 let content = match std::fs::read(path) {
183 Ok(c) => c,
184 Err(err) => {
185 tracing::warn!("Failed to read {}: {}", path.display(), err);
186 continue;
187 }
188 };
189
190 let rel_path = path.strip_prefix(root).unwrap_or(path);
192 let path_str = rel_path.to_string_lossy().to_string();
193
194 let (changed, hash) = self.change_detector.check_changed(&path_str, &content);
196 if !changed {
197 files_skipped += 1;
198 continue;
199 }
200
201 match self.parser.parse_file(&path_str, &content) {
203 Some(result) => {
204 total_symbols += result.symbols.len();
205 total_references += result.references.len();
206 total_chunks += result.chunks.len();
207 files_parsed += 1;
208
209 self.change_detector.record_hash(&path_str, hash);
211
212 parse_results.push(result);
213
214 if let Some(tx) = tx {
216 let _ = tx.send(IndexProgress {
217 files_scanned,
218 files_parsed,
219 total_symbols,
220 current_file: path_str.clone(),
221 });
222 }
223 }
224 None => {
225 tracing::warn!("Failed to parse {}", path_str);
226 }
227 }
228 }
229
230 tracing::info!(
231 "Indexed {}: {} scanned, {} parsed, {} skipped, {} symbols, {} references, {} chunks",
232 root.display(),
233 files_scanned,
234 files_parsed,
235 files_skipped,
236 total_symbols,
237 total_references,
238 total_chunks,
239 );
240
241 Ok(IndexResult {
242 files_scanned,
243 files_parsed,
244 files_skipped,
245 total_symbols,
246 total_references,
247 total_chunks,
248 parse_results,
249 })
250 }
251
252 pub fn index_and_resolve(
259 &mut self,
260 root: &Path,
261 ) -> Result<IndexAndResolveResult, codemem_core::CodememError> {
262 self.index_and_resolve_with_scip(root, None, None)
263 }
264
265 pub fn index_and_resolve_with_scip(
271 &mut self,
272 root: &Path,
273 scip_covered_files: Option<&HashSet<String>>,
274 scip_build: Option<super::scip::graph_builder::ScipBuildResult>,
275 ) -> Result<IndexAndResolveResult, codemem_core::CodememError> {
276 let result = self.index_directory(root)?;
277
278 let mut all_symbols = Vec::new();
279 let mut all_references = Vec::new();
280 let mut all_chunks = Vec::new();
281 let mut file_paths = HashSet::new();
282
283 let IndexResult {
285 files_scanned,
286 files_parsed,
287 files_skipped,
288 total_symbols,
289 total_references,
290 total_chunks,
291 parse_results,
292 } = result;
293
294 for pr in parse_results {
295 file_paths.insert(pr.file_path.clone());
296 if scip_covered_files.is_some_and(|s| s.contains(&pr.file_path)) {
299 all_chunks.extend(pr.chunks);
300 } else {
301 all_symbols.extend(pr.symbols);
302 all_references.extend(pr.references);
303 all_chunks.extend(pr.chunks);
304 }
305 }
306
307 let mut resolver = ReferenceResolver::new();
308 resolver.add_symbols(&all_symbols);
309 resolver.add_imports(&all_references);
310 let resolve_result = resolver.resolve_all_with_unresolved(&all_references);
311
312 let root_path = std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
314
315 Ok(IndexAndResolveResult {
316 index: IndexResult {
317 files_scanned,
318 files_parsed,
319 files_skipped,
320 total_symbols,
321 total_references,
322 total_chunks,
323 parse_results: Vec::new(),
324 },
325 symbols: all_symbols,
326 references: all_references,
327 chunks: all_chunks,
328 file_paths,
329 edges: resolve_result.edges,
330 unresolved: resolve_result.unresolved,
331 root_path,
332 scip_build,
333 })
334 }
335}
336
337impl Default for Indexer {
338 fn default() -> Self {
339 Self::new()
340 }
341}
342
343#[cfg(test)]
344#[path = "tests/indexer_tests.rs"]
345mod tests;