1use indicatif::{ProgressBar, ProgressStyle};
11use std::path::PathBuf;
12use std::time::Instant;
13
14fn format_bytes(bytes: u64) -> String {
16 const KB: u64 = 1024;
17 const MB: u64 = KB * 1024;
18 const GB: u64 = MB * 1024;
19
20 if bytes >= GB {
21 format!("{:.1} GB", bytes as f64 / GB as f64)
22 } else if bytes >= MB {
23 format!("{:.1} MB", bytes as f64 / MB as f64)
24 } else if bytes >= KB {
25 format!("{:.1} KB", bytes as f64 / KB as f64)
26 } else {
27 format!("{} B", bytes)
28 }
29}
30
31pub struct IndexProgressTracker {
38 pub total_files: usize,
41 pub total_bytes: u64,
43 pub chunk_size: usize,
45 pub estimated_chunks: usize,
47
48 calibration_start: Option<Instant>,
51 pub chunks_per_sec: Option<f64>,
53 pub embedder_model: Option<String>,
55 calibration_done: bool,
57
58 last_speed_update: Option<Instant>,
61 chunks_since_update: usize,
63 speed_ema_alpha: f64,
65
66 pub processed_chunks: usize,
69 pub processed_files: usize,
71 pub skipped_files: usize,
73 pub failed_files: usize,
75 pub indexed_files: usize,
77
78 progress_bar: Option<ProgressBar>,
80}
81
82impl IndexProgressTracker {
83 pub fn pre_scan(paths: &[PathBuf]) -> Self {
87 let total_bytes: u64 = paths
88 .iter()
89 .filter_map(|p| std::fs::metadata(p).ok())
90 .map(|m| m.len())
91 .sum();
92
93 let chunk_size = 500;
96 let estimated_chunks = (total_bytes as usize) / chunk_size;
97
98 Self {
99 total_files: paths.len(),
100 total_bytes,
101 chunk_size,
102 estimated_chunks,
103 calibration_start: None,
104 chunks_per_sec: None,
105 embedder_model: None,
106 calibration_done: false,
107 last_speed_update: None,
108 chunks_since_update: 0,
109 speed_ema_alpha: 0.3, processed_chunks: 0,
111 processed_files: 0,
112 skipped_files: 0,
113 failed_files: 0,
114 indexed_files: 0,
115 progress_bar: None,
116 }
117 }
118
119 pub fn display_pre_scan(&self) {
121 eprintln!();
122 eprintln!("Phase 1: Pre-scan");
123 eprintln!(" |-- Files: {}", self.total_files);
124 eprintln!(" |-- Total size: {}", format_bytes(self.total_bytes));
125 eprintln!(
126 " `-- Est. chunks: ~{} (@ {} chars/chunk)",
127 self.estimated_chunks, self.chunk_size
128 );
129 }
130
131 pub fn start_calibration(&mut self) {
135 self.calibration_start = Some(Instant::now());
136 eprintln!();
137 eprintln!("Phase 2: Calibration (first file)...");
138 }
139
140 pub fn finish_calibration(&mut self, chunks_processed: usize, model: &str) {
146 if let Some(start) = self.calibration_start {
147 let elapsed = start.elapsed();
148 if elapsed.as_secs_f64() > 0.0 && chunks_processed > 0 {
149 self.chunks_per_sec = Some(chunks_processed as f64 / elapsed.as_secs_f64());
150 }
151 self.embedder_model = Some(model.to_string());
152 self.calibration_done = true;
153 self.last_speed_update = Some(Instant::now());
155 self.chunks_since_update = 0;
156
157 eprintln!(
158 " `-- Speed: {:.1} chunks/sec ({}) [dynamic]",
159 self.chunks_per_sec.unwrap_or(0.0),
160 model
161 );
162 }
163 }
164
165 pub fn is_calibrated(&self) -> bool {
167 self.calibration_done
168 }
169
170 pub fn start_progress_bar(&mut self) {
174 let pb = ProgressBar::new(self.estimated_chunks as u64);
175 pb.set_style(
176 ProgressStyle::default_bar()
177 .template(
178 "{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} chunks | ETA: {eta} | {msg}",
179 )
180 .expect("Invalid progress bar template")
181 .progress_chars("#>-"),
182 );
183
184 eprintln!();
185 eprintln!("Phase 3: Indexing...");
186 self.progress_bar = Some(pb);
187 }
188
189 pub fn inc_chunks(&mut self, count: usize) {
194 self.processed_chunks += count;
195 self.chunks_since_update += count;
196
197 if let Some(last_update) = self.last_speed_update {
199 let elapsed = last_update.elapsed().as_secs_f64();
200 if elapsed >= 2.0 && self.chunks_since_update > 0 {
201 let current_speed = self.chunks_since_update as f64 / elapsed;
202
203 self.chunks_per_sec = Some(match self.chunks_per_sec {
205 Some(old_speed) => {
206 self.speed_ema_alpha * current_speed
207 + (1.0 - self.speed_ema_alpha) * old_speed
208 }
209 None => current_speed,
210 });
211
212 self.last_speed_update = Some(Instant::now());
214 self.chunks_since_update = 0;
215 }
216 }
217
218 if let Some(ref pb) = self.progress_bar {
219 pb.set_position(self.processed_chunks as u64);
220 }
221 }
222
223 pub fn file_indexed(&mut self, chunks: usize) {
228 self.indexed_files += 1;
229 self.processed_files += 1;
230 self.inc_chunks(chunks);
231 }
232
233 pub fn file_skipped(&mut self) {
235 self.skipped_files += 1;
236 self.processed_files += 1;
237 }
238
239 pub fn file_failed(&mut self) {
241 self.failed_files += 1;
242 self.processed_files += 1;
243 }
244
245 pub fn set_message(&mut self, msg: &str) {
247 if let Some(ref pb) = self.progress_bar {
248 pb.set_message(msg.to_string());
249 }
250 }
251
252 pub fn finish(&mut self) {
254 if let Some(ref pb) = self.progress_bar {
255 pb.finish_with_message("Complete");
256 }
257 }
258
259 pub fn display_summary(&self) {
261 eprintln!();
262 eprintln!("Indexing complete:");
263 eprintln!(" |-- Chunks indexed: {}", self.processed_chunks);
264 eprintln!(" |-- Files processed: {}", self.processed_files);
265 eprintln!(" | |-- Indexed: {}", self.indexed_files);
266 if self.skipped_files > 0 {
267 eprintln!(" | |-- Skipped (duplicate): {}", self.skipped_files);
268 }
269 if self.failed_files > 0 {
270 eprintln!(" | `-- Failed: {}", self.failed_files);
271 }
272 if let Some(speed) = self.chunks_per_sec {
273 eprintln!(" `-- Avg speed: {:.1} chunks/sec", speed);
274 }
275 }
276
277 pub fn adjust_estimate(&mut self, file_bytes: u64, actual_chunks: usize) {
282 if file_bytes > 0 && actual_chunks > 0 {
283 let bytes_per_chunk = file_bytes as f64 / actual_chunks as f64;
285 let remaining_bytes = self.total_bytes.saturating_sub(file_bytes);
287 let remaining_chunks = (remaining_bytes as f64 / bytes_per_chunk) as usize;
289 self.estimated_chunks = actual_chunks + remaining_chunks;
291
292 if let Some(ref pb) = self.progress_bar {
294 pb.set_length(self.estimated_chunks as u64);
295 }
296 }
297 }
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303 use std::io::Write;
304 use tempfile::TempDir;
305
306 #[test]
307 fn test_format_bytes() {
308 assert_eq!(format_bytes(0), "0 B");
309 assert_eq!(format_bytes(500), "500 B");
310 assert_eq!(format_bytes(1024), "1.0 KB");
311 assert_eq!(format_bytes(1536), "1.5 KB");
312 assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
313 assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
314 }
315
316 #[test]
317 fn test_pre_scan_empty() {
318 let tracker = IndexProgressTracker::pre_scan(&[]);
319 assert_eq!(tracker.total_files, 0);
320 assert_eq!(tracker.total_bytes, 0);
321 assert_eq!(tracker.estimated_chunks, 0);
322 }
323
324 #[test]
325 fn test_pre_scan_with_files() {
326 let temp = TempDir::new().unwrap();
327 let file1 = temp.path().join("file1.txt");
328 let file2 = temp.path().join("file2.txt");
329
330 let mut f1 = std::fs::File::create(&file1).unwrap();
332 f1.write_all(&[b'a'; 1000]).unwrap();
333
334 let mut f2 = std::fs::File::create(&file2).unwrap();
335 f2.write_all(&[b'b'; 500]).unwrap();
336
337 let paths = vec![file1, file2];
338 let tracker = IndexProgressTracker::pre_scan(&paths);
339
340 assert_eq!(tracker.total_files, 2);
341 assert_eq!(tracker.total_bytes, 1500);
342 assert_eq!(tracker.estimated_chunks, 3);
344 }
345
346 #[test]
347 fn test_file_tracking() {
348 let tracker_paths: Vec<PathBuf> = vec![];
349 let mut tracker = IndexProgressTracker::pre_scan(&tracker_paths);
350
351 tracker.file_indexed(10);
353 tracker.file_indexed(5);
354 tracker.file_skipped();
355 tracker.file_failed();
356
357 assert_eq!(tracker.processed_files, 4);
358 assert_eq!(tracker.indexed_files, 2);
359 assert_eq!(tracker.skipped_files, 1);
360 assert_eq!(tracker.failed_files, 1);
361 assert_eq!(tracker.processed_chunks, 15);
362 }
363
364 #[test]
365 fn test_calibration_flow() {
366 let tracker_paths: Vec<PathBuf> = vec![];
367 let mut tracker = IndexProgressTracker::pre_scan(&tracker_paths);
368
369 assert!(!tracker.is_calibrated());
370
371 tracker.start_calibration();
372 std::thread::sleep(std::time::Duration::from_millis(10));
374 tracker.finish_calibration(100, "test-model");
375
376 assert!(tracker.is_calibrated());
377 assert!(tracker.chunks_per_sec.is_some());
378 assert_eq!(tracker.embedder_model, Some("test-model".to_string()));
379 }
380
381 #[test]
382 fn test_adjust_estimate() {
383 let tracker_paths: Vec<PathBuf> = vec![];
384 let mut tracker = IndexProgressTracker::pre_scan(&tracker_paths);
385 tracker.total_bytes = 10000;
386 tracker.estimated_chunks = 20;
387
388 tracker.adjust_estimate(1000, 5);
393
394 assert_eq!(tracker.estimated_chunks, 50);
395 }
396}