1use crate::cli::IndexArgs;
11use crate::core::config::Config;
12use crate::core::error::Result;
13use crate::core::project::Project;
14use crate::index::{IndexWriter, TantivyIndex};
15use crate::parse::{chunk_file, Chunk};
16use ignore::WalkBuilder;
17use rayon::prelude::*;
18use std::env;
19use std::path::PathBuf;
20use std::sync::atomic::{AtomicUsize, Ordering};
21use std::time::Instant;
22use tracing::{debug, info};
23
24const COMMIT_BATCH_SIZE: usize = 5000;
26
27pub fn run(args: IndexArgs) -> Result<()> {
29 let project_path = args
31 .project
32 .unwrap_or_else(|| env::current_dir().expect("Failed to get current directory"));
33
34 let project = Project::detect(&project_path)?;
36 info!(project = %project.name, root = %project.root.display(), "Indexing project");
37
38 let start = Instant::now();
39
40 let config = Config::load()?;
42
43 let index = if args.force {
45 TantivyIndex::delete(&project.root)?;
46 TantivyIndex::open_or_create(&project.root)?
47 } else {
48 TantivyIndex::open_or_create(&project.root)?
49 };
50
51 let walker = WalkBuilder::new(&project.root)
55 .hidden(true)
56 .git_ignore(true)
57 .git_global(true)
58 .git_exclude(true)
59 .max_filesize(Some(config.index.max_file_size))
60 .build();
61
62 let ignore_patterns = config.ignore.patterns.clone();
63
64 let file_paths: Vec<PathBuf> = walker
65 .flatten()
66 .filter_map(|entry| {
67 let path = entry.path();
68 if path.is_dir() {
69 return None;
70 }
71 if !is_code_file(path) {
72 return None;
73 }
74 if should_ignore(path, &ignore_patterns) {
75 debug!(path = %path.display(), "Skipping ignored file");
76 return None;
77 }
78 Some(path.to_path_buf())
79 })
80 .collect();
81
82 let total_files = file_paths.len();
83 info!(files = total_files, "Found files to index");
84
85 let file_count = AtomicUsize::new(0);
90 let chunk_count = AtomicUsize::new(0);
91
92 let batch_size = 500; let mut writer = IndexWriter::new(&index)?;
95 let mut total_chunks_written = 0usize;
96
97 for batch in file_paths.chunks(batch_size) {
98 let batch_chunks: Vec<Chunk> = batch
100 .par_iter()
101 .filter_map(|path| {
102 let content = match std::fs::read_to_string(path) {
103 Ok(c) => c,
104 Err(e) => {
105 debug!(path = %path.display(), error = %e, "Failed to read file");
106 return None;
107 }
108 };
109
110 file_count.fetch_add(1, Ordering::Relaxed);
111 let chunks = chunk_file(path, &content);
112 chunk_count.fetch_add(chunks.len(), Ordering::Relaxed);
113
114 Some(chunks)
116 })
117 .flatten()
118 .collect();
119
120 for chunk in &batch_chunks {
122 writer.add_chunk(chunk)?;
123 total_chunks_written += 1;
124
125 if total_chunks_written % COMMIT_BATCH_SIZE == 0 {
127 debug!(chunks = total_chunks_written, "Intermediate commit");
128 writer = writer.commit_and_reopen(&index)?;
129 }
130 }
131 }
133
134 writer.commit()?;
136
137 let elapsed = start.elapsed();
138 let final_file_count = file_count.load(Ordering::Relaxed);
139 let final_chunk_count = chunk_count.load(Ordering::Relaxed);
140
141 let chunks_per_sec = if elapsed.as_secs_f64() > 0.0 {
142 final_chunk_count as f64 / elapsed.as_secs_f64()
143 } else {
144 0.0
145 };
146
147 info!(
148 files = final_file_count,
149 chunks = final_chunk_count,
150 elapsed_ms = elapsed.as_millis(),
151 chunks_per_sec = chunks_per_sec as u64,
152 "Indexing complete"
153 );
154
155 println!(
156 "Indexed {} files ({} chunks) in {:.2}s ({:.0} chunks/sec)",
157 final_file_count,
158 final_chunk_count,
159 elapsed.as_secs_f64(),
160 chunks_per_sec
161 );
162
163 Ok(())
164}
165
166fn is_code_file(path: &std::path::Path) -> bool {
168 let ext = path
169 .extension()
170 .and_then(|e| e.to_str())
171 .unwrap_or("")
172 .to_lowercase();
173
174 matches!(
175 ext.as_str(),
176 "ts" | "tsx"
177 | "js"
178 | "jsx"
179 | "mjs"
180 | "cjs"
181 | "py"
182 | "pyi"
183 | "rs"
184 | "go"
185 | "java"
186 | "kt"
187 | "kts"
188 | "scala"
189 | "rb"
190 | "php"
191 | "c"
192 | "h"
193 | "cpp"
194 | "cc"
195 | "cxx"
196 | "hpp"
197 | "cs"
198 | "swift"
199 | "ex"
200 | "exs"
201 | "erl"
202 | "hrl"
203 | "hs"
204 | "ml"
205 | "mli"
206 | "lua"
207 | "sh"
208 | "bash"
209 | "zsh"
210 | "sql"
211 | "vue"
212 | "svelte"
213 )
214}
215
216fn should_ignore(path: &std::path::Path, patterns: &[String]) -> bool {
218 let path_str = path.to_string_lossy();
219
220 for pattern in patterns {
221 if path_str.contains(pattern.trim_matches('*')) {
224 return true;
225 }
226 }
227
228 false
229}