1use crate::index::chunker::CodeChunk;
7use crate::index::incremental::ChangeDetector;
8use crate::index::parser::{CodeParser, ParseResult};
9use crate::index::resolver::{ReferenceResolver, ResolvedEdge};
10use crate::index::symbol::{Reference, Symbol};
11use ignore::WalkBuilder;
12use std::collections::HashSet;
13use std::path::Path;
14
15#[derive(Debug, Clone)]
17pub struct IndexProgress {
18 pub files_scanned: usize,
20 pub files_parsed: usize,
22 pub total_symbols: usize,
24 pub current_file: String,
26}
27
28#[derive(Debug)]
30pub struct IndexResult {
31 pub files_scanned: usize,
33 pub files_parsed: usize,
35 pub files_skipped: usize,
37 pub total_symbols: usize,
39 pub total_references: usize,
41 pub total_chunks: usize,
43 pub parse_results: Vec<ParseResult>,
45}
46
47#[derive(Debug)]
49pub struct IndexAndResolveResult {
50 pub index: IndexResult,
52 pub symbols: Vec<Symbol>,
54 pub references: Vec<Reference>,
56 pub chunks: Vec<CodeChunk>,
58 pub file_paths: HashSet<String>,
60 pub edges: Vec<ResolvedEdge>,
62}
63
64pub struct Indexer {
69 parser: CodeParser,
70 change_detector: ChangeDetector,
71}
72
73impl Indexer {
74 pub fn new() -> Self {
76 Self {
77 parser: CodeParser::new(),
78 change_detector: ChangeDetector::new(),
79 }
80 }
81
82 pub fn with_change_detector(change_detector: ChangeDetector) -> Self {
84 Self {
85 parser: CodeParser::new(),
86 change_detector,
87 }
88 }
89
90 pub fn change_detector(&self) -> &ChangeDetector {
92 &self.change_detector
93 }
94
95 pub fn change_detector_mut(&mut self) -> &mut ChangeDetector {
97 &mut self.change_detector
98 }
99
100 pub fn index_directory(
106 &mut self,
107 root: &Path,
108 ) -> Result<IndexResult, codemem_core::CodememError> {
109 self.index_directory_inner(root, None)
110 }
111
112 pub fn index_directory_with_progress(
117 &mut self,
118 root: &Path,
119 tx: Option<&tokio::sync::broadcast::Sender<IndexProgress>>,
120 ) -> Result<IndexResult, codemem_core::CodememError> {
121 self.index_directory_inner(root, tx)
122 }
123
124 fn index_directory_inner(
126 &mut self,
127 root: &Path,
128 tx: Option<&tokio::sync::broadcast::Sender<IndexProgress>>,
129 ) -> Result<IndexResult, codemem_core::CodememError> {
130 let mut files_scanned = 0usize;
131 let mut files_parsed = 0usize;
132 let mut files_skipped = 0usize;
133 let mut total_symbols = 0usize;
134 let mut total_references = 0usize;
135 let mut total_chunks = 0usize;
136 let mut parse_results = Vec::new();
137
138 let walker = WalkBuilder::new(root)
139 .hidden(true) .git_ignore(true) .git_global(true) .git_exclude(true) .build();
144
145 for entry in walker {
146 let entry = match entry {
147 Ok(e) => e,
148 Err(err) => {
149 tracing::warn!("Walk error: {}", err);
150 continue;
151 }
152 };
153
154 if !entry.file_type().is_some_and(|ft| ft.is_file()) {
156 continue;
157 }
158
159 let path = entry.path();
160
161 let ext = match path.extension().and_then(|e| e.to_str()) {
163 Some(e) => e,
164 None => continue,
165 };
166
167 if !self.parser.supports_extension(ext) {
168 continue;
169 }
170
171 files_scanned += 1;
172
173 let content = match std::fs::read(path) {
175 Ok(c) => c,
176 Err(err) => {
177 tracing::warn!("Failed to read {}: {}", path.display(), err);
178 continue;
179 }
180 };
181
182 let path_str = path.to_string_lossy().to_string();
183
184 let (changed, hash) = self.change_detector.check_changed(&path_str, &content);
186 if !changed {
187 files_skipped += 1;
188 continue;
189 }
190
191 match self.parser.parse_file(&path_str, &content) {
193 Some(result) => {
194 total_symbols += result.symbols.len();
195 total_references += result.references.len();
196 total_chunks += result.chunks.len();
197 files_parsed += 1;
198
199 self.change_detector.record_hash(&path_str, hash);
201
202 parse_results.push(result);
203
204 if let Some(tx) = tx {
206 let relative_path = path
207 .strip_prefix(root)
208 .unwrap_or(path)
209 .to_string_lossy()
210 .to_string();
211 let _ = tx.send(IndexProgress {
212 files_scanned,
213 files_parsed,
214 total_symbols,
215 current_file: relative_path,
216 });
217 }
218 }
219 None => {
220 tracing::warn!("Failed to parse {}", path_str);
221 }
222 }
223 }
224
225 tracing::info!(
226 "Indexed {}: {} scanned, {} parsed, {} skipped, {} symbols, {} references, {} chunks",
227 root.display(),
228 files_scanned,
229 files_parsed,
230 files_skipped,
231 total_symbols,
232 total_references,
233 total_chunks,
234 );
235
236 Ok(IndexResult {
237 files_scanned,
238 files_parsed,
239 files_skipped,
240 total_symbols,
241 total_references,
242 total_chunks,
243 parse_results,
244 })
245 }
246
247 pub fn index_and_resolve(
254 &mut self,
255 root: &Path,
256 ) -> Result<IndexAndResolveResult, codemem_core::CodememError> {
257 let result = self.index_directory(root)?;
258
259 let mut all_symbols = Vec::new();
260 let mut all_references = Vec::new();
261 let mut all_chunks = Vec::new();
262 let mut file_paths = HashSet::new();
263
264 let IndexResult {
266 files_scanned,
267 files_parsed,
268 files_skipped,
269 total_symbols,
270 total_references,
271 total_chunks,
272 parse_results,
273 } = result;
274
275 for pr in parse_results {
276 file_paths.insert(pr.file_path);
277 all_symbols.extend(pr.symbols);
278 all_references.extend(pr.references);
279 all_chunks.extend(pr.chunks);
280 }
281
282 let mut resolver = ReferenceResolver::new();
283 resolver.add_symbols(&all_symbols);
284 let edges = resolver.resolve_all(&all_references);
285
286 Ok(IndexAndResolveResult {
287 index: IndexResult {
288 files_scanned,
289 files_parsed,
290 files_skipped,
291 total_symbols,
292 total_references,
293 total_chunks,
294 parse_results: Vec::new(),
295 },
296 symbols: all_symbols,
297 references: all_references,
298 chunks: all_chunks,
299 file_paths,
300 edges,
301 })
302 }
303}
304
305impl Default for Indexer {
306 fn default() -> Self {
307 Self::new()
308 }
309}
310
311#[cfg(test)]
312#[path = "tests/indexer_tests.rs"]
313mod tests;