1use crate::cli::IndexArgs;
12use crate::core::config::Config;
13use crate::core::error::Result;
14use crate::core::project::Project;
15use crate::index::{IndexWriter, TantivyIndex};
16use crate::parse::{chunk_file, Chunk};
17use crate::trace::{
18 build_and_save_index, detect_language, find_dead_symbols, is_treesitter_supported, load_index,
19 snapshots::create_snapshot, trace_index_path, SemanticIndex,
20};
21use ignore::WalkBuilder;
22use rayon::prelude::*;
23use std::collections::{HashMap, HashSet};
24use std::env;
25use std::path::PathBuf;
26use std::sync::atomic::{AtomicUsize, Ordering};
27use std::time::Instant;
28use tracing::{debug, info};
29
30const COMMIT_BATCH_SIZE: usize = 5000;
32
33pub fn run(args: IndexArgs) -> Result<()> {
35 let project_path = args
37 .project
38 .unwrap_or_else(|| env::current_dir().expect("Failed to get current directory"));
39
40 let project = Project::detect(&project_path)?;
42 info!(project = %project.name, root = %project.root.display(), "Indexing project");
43
44 let start = Instant::now();
45
46 let config = Config::load()?;
48
49 let index = if args.force {
51 TantivyIndex::delete(&project.root)?;
52 TantivyIndex::open_or_create(&project.root)?
53 } else {
54 TantivyIndex::open_or_create(&project.root)?
55 };
56
57 let walker = WalkBuilder::new(&project.root)
61 .hidden(true)
62 .git_ignore(true)
63 .git_global(true)
64 .git_exclude(true)
65 .max_filesize(Some(config.index.max_file_size))
66 .build();
67
68 let ignore_patterns = config.ignore.patterns.clone();
69
70 let file_paths: Vec<PathBuf> = walker
71 .flatten()
72 .filter_map(|entry| {
73 let path = entry.path();
74 if path.is_dir() {
75 return None;
76 }
77 if !is_code_file(path) {
78 return None;
79 }
80 if should_ignore(path, &ignore_patterns) {
81 debug!(path = %path.display(), "Skipping ignored file");
82 return None;
83 }
84 Some(path.to_path_buf())
85 })
86 .collect();
87
88 let total_files = file_paths.len();
89 info!(files = total_files, "Found files to index");
90
91 let file_count = AtomicUsize::new(0);
96 let chunk_count = AtomicUsize::new(0);
97
98 let batch_size = 500; let mut writer = IndexWriter::new(&index)?;
101 let mut total_chunks_written = 0usize;
102
103 for batch in file_paths.chunks(batch_size) {
104 let batch_chunks: Vec<Chunk> = batch
106 .par_iter()
107 .filter_map(|path| {
108 let content = match std::fs::read_to_string(path) {
109 Ok(c) => c,
110 Err(e) => {
111 debug!(path = %path.display(), error = %e, "Failed to read file");
112 return None;
113 }
114 };
115
116 file_count.fetch_add(1, Ordering::Relaxed);
117 let chunks = chunk_file(path, &content);
118 chunk_count.fetch_add(chunks.len(), Ordering::Relaxed);
119
120 Some(chunks)
122 })
123 .flatten()
124 .collect();
125
126 for chunk in &batch_chunks {
128 writer.add_chunk(chunk)?;
129 total_chunks_written += 1;
130
131 if total_chunks_written % COMMIT_BATCH_SIZE == 0 {
133 debug!(chunks = total_chunks_written, "Intermediate commit");
134 writer = writer.commit_and_reopen(&index)?;
135 }
136 }
137 }
139
140 writer.commit()?;
142
143 let tantivy_elapsed = start.elapsed();
144 let final_file_count = file_count.load(Ordering::Relaxed);
145 let final_chunk_count = chunk_count.load(Ordering::Relaxed);
146
147 let chunks_per_sec = if tantivy_elapsed.as_secs_f64() > 0.0 {
148 final_chunk_count as f64 / tantivy_elapsed.as_secs_f64()
149 } else {
150 0.0
151 };
152
153 info!(
154 files = final_file_count,
155 chunks = final_chunk_count,
156 elapsed_ms = tantivy_elapsed.as_millis(),
157 chunks_per_sec = chunks_per_sec as u64,
158 "Text index complete"
159 );
160
161 println!(
162 "Text index: {} files ({} chunks) in {:.2}s",
163 final_file_count,
164 final_chunk_count,
165 tantivy_elapsed.as_secs_f64(),
166 );
167
168 let trace_start = Instant::now();
172 info!("Building semantic trace index...");
173
174 let semantic_files: Vec<(PathBuf, String)> = file_paths
177 .par_iter()
178 .filter_map(|path| {
179 let lang = detect_language(path);
180 if !is_treesitter_supported(lang) {
181 return None;
182 }
183 match std::fs::read_to_string(path) {
184 Ok(content) => Some((path.clone(), content)),
185 Err(_) => None,
186 }
187 })
188 .collect();
189
190 let semantic_file_count = semantic_files.len();
191
192 if semantic_file_count > 0 {
193 match build_and_save_index(&project.root, &semantic_files) {
194 Ok(stats) => {
195 let trace_elapsed = trace_start.elapsed();
196 info!(
197 files = stats.files,
198 symbols = stats.symbols,
199 tokens = stats.tokens,
200 edges = stats.edges,
201 elapsed_ms = trace_elapsed.as_millis(),
202 "Trace index complete"
203 );
204 println!(
205 "Trace index: {} files ({} symbols, {} edges) in {:.2}s",
206 stats.files,
207 stats.symbols,
208 stats.edges,
209 trace_elapsed.as_secs_f64(),
210 );
211 }
212 Err(e) => {
213 tracing::warn!("Failed to build trace index: {}", e);
214 println!("Warning: Trace index build failed: {}", e);
215 }
216 }
217 } else {
218 println!("Trace index: skipped (no supported languages)");
219 }
220
221 let total_elapsed = start.elapsed();
222 println!(
223 "\nTotal: {:.2}s ({:.0} chunks/sec)",
224 total_elapsed.as_secs_f64(),
225 chunks_per_sec
226 );
227
228 if semantic_file_count > 0 {
233 let trace_path = trace_index_path(&project.root);
234 if trace_path.exists() {
235 match load_index(&trace_path) {
236 Ok(index) => {
237 let dead_symbols = find_dead_symbols(&index);
238 let cycles_count = count_cycles(&index) as u32;
239
240 match create_snapshot(
241 &index,
242 &project.root,
243 &project.name,
244 &dead_symbols.iter().map(|s| s.id).collect(),
245 cycles_count,
246 None, ) {
248 Ok(_) => {
249 debug!("Auto-created snapshot after indexing");
250 }
251 Err(e) => {
252 debug!("Failed to create snapshot: {}", e);
253 }
254 }
255 }
256 Err(e) => {
257 debug!("Failed to load trace index for snapshot: {}", e);
258 }
259 }
260 }
261 }
262
263 Ok(())
264}
265
266fn count_cycles(index: &SemanticIndex) -> usize {
268 let mut graph: HashMap<u16, HashSet<u16>> = HashMap::new();
269
270 for edge in &index.edges {
271 if let (Some(from_sym), Some(to_sym)) =
272 (index.symbol(edge.from_symbol), index.symbol(edge.to_symbol))
273 {
274 if from_sym.file_id != to_sym.file_id {
275 graph
276 .entry(from_sym.file_id)
277 .or_default()
278 .insert(to_sym.file_id);
279 }
280 }
281 }
282
283 let mut cycles = 0;
284 let mut visited = HashSet::new();
285 let mut rec_stack = HashSet::new();
286
287 for &node in graph.keys() {
288 if !visited.contains(&node) {
289 cycles += count_cycles_dfs(node, &graph, &mut visited, &mut rec_stack);
290 }
291 }
292
293 cycles
294}
295
296fn count_cycles_dfs(
297 node: u16,
298 graph: &HashMap<u16, HashSet<u16>>,
299 visited: &mut HashSet<u16>,
300 rec_stack: &mut HashSet<u16>,
301) -> usize {
302 visited.insert(node);
303 rec_stack.insert(node);
304
305 let mut cycles = 0;
306
307 if let Some(neighbors) = graph.get(&node) {
308 for &neighbor in neighbors {
309 if !visited.contains(&neighbor) {
310 cycles += count_cycles_dfs(neighbor, graph, visited, rec_stack);
311 } else if rec_stack.contains(&neighbor) {
312 cycles += 1;
313 }
314 }
315 }
316
317 rec_stack.remove(&node);
318 cycles
319}
320
321fn is_code_file(path: &std::path::Path) -> bool {
323 let ext = path
324 .extension()
325 .and_then(|e| e.to_str())
326 .unwrap_or("")
327 .to_lowercase();
328
329 matches!(
330 ext.as_str(),
331 "ts" | "tsx"
332 | "js"
333 | "jsx"
334 | "mjs"
335 | "cjs"
336 | "py"
337 | "pyi"
338 | "rs"
339 | "go"
340 | "java"
341 | "kt"
342 | "kts"
343 | "scala"
344 | "rb"
345 | "php"
346 | "c"
347 | "h"
348 | "cpp"
349 | "cc"
350 | "cxx"
351 | "hpp"
352 | "cs"
353 | "swift"
354 | "ex"
355 | "exs"
356 | "erl"
357 | "hrl"
358 | "hs"
359 | "ml"
360 | "mli"
361 | "lua"
362 | "sh"
363 | "bash"
364 | "zsh"
365 | "sql"
366 | "vue"
367 | "svelte"
368 )
369}
370
371fn should_ignore(path: &std::path::Path, patterns: &[String]) -> bool {
373 let path_str = path.to_string_lossy();
374
375 for pattern in patterns {
376 if path_str.contains(pattern.trim_matches('*')) {
379 return true;
380 }
381 }
382
383 false
384}