1use crate::index::chunker::CodeChunk;
7use crate::index::incremental::ChangeDetector;
8use crate::index::parser::{CodeParser, ParseResult};
9use crate::index::resolver::{ReferenceResolver, ResolvedEdge};
10use crate::index::symbol::{Reference, Symbol};
11use ignore::WalkBuilder;
12use std::collections::HashSet;
13use std::path::{Path, PathBuf};
14
15#[derive(Debug, Clone)]
17pub struct IndexProgress {
18 pub files_scanned: usize,
20 pub files_parsed: usize,
22 pub total_symbols: usize,
24 pub current_file: String,
26}
27
28#[derive(Debug)]
30pub struct IndexResult {
31 pub files_scanned: usize,
33 pub files_parsed: usize,
35 pub files_skipped: usize,
37 pub total_symbols: usize,
39 pub total_references: usize,
41 pub total_chunks: usize,
43 pub parse_results: Vec<ParseResult>,
45}
46
47#[derive(Debug)]
49pub struct IndexAndResolveResult {
50 pub index: IndexResult,
52 pub symbols: Vec<Symbol>,
54 pub references: Vec<Reference>,
56 pub chunks: Vec<CodeChunk>,
58 pub file_paths: HashSet<String>,
60 pub edges: Vec<ResolvedEdge>,
62 pub root_path: PathBuf,
65}
66
67pub struct Indexer {
72 parser: CodeParser,
73 change_detector: ChangeDetector,
74}
75
76impl Indexer {
77 pub fn new() -> Self {
79 Self {
80 parser: CodeParser::new(),
81 change_detector: ChangeDetector::new(),
82 }
83 }
84
85 pub fn with_change_detector(change_detector: ChangeDetector) -> Self {
87 Self {
88 parser: CodeParser::new(),
89 change_detector,
90 }
91 }
92
93 pub fn change_detector(&self) -> &ChangeDetector {
95 &self.change_detector
96 }
97
98 pub fn change_detector_mut(&mut self) -> &mut ChangeDetector {
100 &mut self.change_detector
101 }
102
103 pub fn index_directory(
109 &mut self,
110 root: &Path,
111 ) -> Result<IndexResult, codemem_core::CodememError> {
112 self.index_directory_inner(root, None)
113 }
114
115 pub fn index_directory_with_progress(
120 &mut self,
121 root: &Path,
122 tx: Option<&tokio::sync::broadcast::Sender<IndexProgress>>,
123 ) -> Result<IndexResult, codemem_core::CodememError> {
124 self.index_directory_inner(root, tx)
125 }
126
127 fn index_directory_inner(
129 &mut self,
130 root: &Path,
131 tx: Option<&tokio::sync::broadcast::Sender<IndexProgress>>,
132 ) -> Result<IndexResult, codemem_core::CodememError> {
133 let mut files_scanned = 0usize;
134 let mut files_parsed = 0usize;
135 let mut files_skipped = 0usize;
136 let mut total_symbols = 0usize;
137 let mut total_references = 0usize;
138 let mut total_chunks = 0usize;
139 let mut parse_results = Vec::new();
140
141 let walker = WalkBuilder::new(root)
142 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .build();
147
148 for entry in walker {
149 let entry = match entry {
150 Ok(e) => e,
151 Err(err) => {
152 tracing::warn!("Walk error: {}", err);
153 continue;
154 }
155 };
156
157 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
159 continue;
160 }
161
162 let path = entry.path();
163
164 let ext = match path.extension().and_then(|e| e.to_str()) {
166 Some(e) => e,
167 None => continue,
168 };
169
170 if !self.parser.supports_extension(ext) {
171 continue;
172 }
173
174 files_scanned += 1;
175
176 let content = match std::fs::read(path) {
178 Ok(c) => c,
179 Err(err) => {
180 tracing::warn!("Failed to read {}: {}", path.display(), err);
181 continue;
182 }
183 };
184
185 let rel_path = path.strip_prefix(root).unwrap_or(path);
187 let path_str = rel_path.to_string_lossy().to_string();
188
189 let (changed, hash) = self.change_detector.check_changed(&path_str, &content);
191 if !changed {
192 files_skipped += 1;
193 continue;
194 }
195
196 match self.parser.parse_file(&path_str, &content) {
198 Some(result) => {
199 total_symbols += result.symbols.len();
200 total_references += result.references.len();
201 total_chunks += result.chunks.len();
202 files_parsed += 1;
203
204 self.change_detector.record_hash(&path_str, hash);
206
207 parse_results.push(result);
208
209 if let Some(tx) = tx {
211 let _ = tx.send(IndexProgress {
212 files_scanned,
213 files_parsed,
214 total_symbols,
215 current_file: path_str.clone(),
216 });
217 }
218 }
219 None => {
220 tracing::warn!("Failed to parse {}", path_str);
221 }
222 }
223 }
224
225 tracing::info!(
226 "Indexed {}: {} scanned, {} parsed, {} skipped, {} symbols, {} references, {} chunks",
227 root.display(),
228 files_scanned,
229 files_parsed,
230 files_skipped,
231 total_symbols,
232 total_references,
233 total_chunks,
234 );
235
236 Ok(IndexResult {
237 files_scanned,
238 files_parsed,
239 files_skipped,
240 total_symbols,
241 total_references,
242 total_chunks,
243 parse_results,
244 })
245 }
246
247 pub fn index_and_resolve(
254 &mut self,
255 root: &Path,
256 ) -> Result<IndexAndResolveResult, codemem_core::CodememError> {
257 let result = self.index_directory(root)?;
258
259 let mut all_symbols = Vec::new();
260 let mut all_references = Vec::new();
261 let mut all_chunks = Vec::new();
262 let mut file_paths = HashSet::new();
263
264 let IndexResult {
266 files_scanned,
267 files_parsed,
268 files_skipped,
269 total_symbols,
270 total_references,
271 total_chunks,
272 parse_results,
273 } = result;
274
275 for pr in parse_results {
276 file_paths.insert(pr.file_path);
277 all_symbols.extend(pr.symbols);
278 all_references.extend(pr.references);
279 all_chunks.extend(pr.chunks);
280 }
281
282 let mut resolver = ReferenceResolver::new();
283 resolver.add_symbols(&all_symbols);
284 resolver.add_imports(&all_references);
285 let edges = resolver.resolve_all(&all_references);
286
287 let root_path = std::fs::canonicalize(root).unwrap_or_else(|_| root.to_path_buf());
289
290 Ok(IndexAndResolveResult {
291 index: IndexResult {
292 files_scanned,
293 files_parsed,
294 files_skipped,
295 total_symbols,
296 total_references,
297 total_chunks,
298 parse_results: Vec::new(),
299 },
300 symbols: all_symbols,
301 references: all_references,
302 chunks: all_chunks,
303 file_paths,
304 edges,
305 root_path,
306 })
307 }
308}
309
310impl Default for Indexer {
311 fn default() -> Self {
312 Self::new()
313 }
314}
315
316#[cfg(test)]
317#[path = "tests/indexer_tests.rs"]
318mod tests;