use indicatif::{ProgressBar, ProgressStyle};
use std::path::PathBuf;
use std::time::Instant;
fn format_bytes(bytes: u64) -> String {
const KB: u64 = 1024;
const MB: u64 = KB * 1024;
const GB: u64 = MB * 1024;
if bytes >= GB {
format!("{:.1} GB", bytes as f64 / GB as f64)
} else if bytes >= MB {
format!("{:.1} MB", bytes as f64 / MB as f64)
} else if bytes >= KB {
format!("{:.1} KB", bytes as f64 / KB as f64)
} else {
format!("{} B", bytes)
}
}
pub struct IndexProgressTracker {
pub total_files: usize,
pub total_bytes: u64,
pub chunk_size: usize,
pub estimated_chunks: usize,
calibration_start: Option<Instant>,
pub chunks_per_sec: Option<f64>,
pub embedder_model: Option<String>,
calibration_done: bool,
pub processed_chunks: usize,
pub processed_files: usize,
pub skipped_files: usize,
pub failed_files: usize,
pub indexed_files: usize,
progress_bar: Option<ProgressBar>,
}
impl IndexProgressTracker {
pub fn pre_scan(paths: &[PathBuf]) -> Self {
let total_bytes: u64 = paths
.iter()
.filter_map(|p| std::fs::metadata(p).ok())
.map(|m| m.len())
.sum();
let chunk_size = 500;
let estimated_chunks = (total_bytes as usize) / chunk_size;
Self {
total_files: paths.len(),
total_bytes,
chunk_size,
estimated_chunks,
calibration_start: None,
chunks_per_sec: None,
embedder_model: None,
calibration_done: false,
processed_chunks: 0,
processed_files: 0,
skipped_files: 0,
failed_files: 0,
indexed_files: 0,
progress_bar: None,
}
}
pub fn display_pre_scan(&self) {
eprintln!();
eprintln!("Phase 1: Pre-scan");
eprintln!(" |-- Files: {}", self.total_files);
eprintln!(" |-- Total size: {}", format_bytes(self.total_bytes));
eprintln!(
" `-- Est. chunks: ~{} (@ {} chars/chunk)",
self.estimated_chunks, self.chunk_size
);
}
pub fn start_calibration(&mut self) {
self.calibration_start = Some(Instant::now());
eprintln!();
eprintln!("Phase 2: Calibration (first file)...");
}
pub fn finish_calibration(&mut self, chunks_processed: usize, model: &str) {
if let Some(start) = self.calibration_start {
let elapsed = start.elapsed();
if elapsed.as_secs_f64() > 0.0 && chunks_processed > 0 {
self.chunks_per_sec = Some(chunks_processed as f64 / elapsed.as_secs_f64());
}
self.embedder_model = Some(model.to_string());
self.calibration_done = true;
eprintln!(
" `-- Speed: {:.1} chunks/sec ({})",
self.chunks_per_sec.unwrap_or(0.0),
model
);
}
}
pub fn is_calibrated(&self) -> bool {
self.calibration_done
}
pub fn start_progress_bar(&mut self) {
let pb = ProgressBar::new(self.estimated_chunks as u64);
pb.set_style(
ProgressStyle::default_bar()
.template(
"{spinner:.green} [{bar:40.cyan/blue}] {pos}/{len} chunks | ETA: {eta} | {msg}",
)
.expect("Invalid progress bar template")
.progress_chars("#>-"),
);
eprintln!();
eprintln!("Phase 3: Indexing...");
self.progress_bar = Some(pb);
}
pub fn inc_chunks(&mut self, count: usize) {
self.processed_chunks += count;
if let Some(ref pb) = self.progress_bar {
pb.set_position(self.processed_chunks as u64);
}
}
pub fn file_indexed(&mut self, chunks: usize) {
self.indexed_files += 1;
self.processed_files += 1;
self.inc_chunks(chunks);
}
pub fn file_skipped(&mut self) {
self.skipped_files += 1;
self.processed_files += 1;
}
pub fn file_failed(&mut self) {
self.failed_files += 1;
self.processed_files += 1;
}
pub fn set_message(&mut self, msg: &str) {
if let Some(ref pb) = self.progress_bar {
pb.set_message(msg.to_string());
}
}
pub fn finish(&mut self) {
if let Some(ref pb) = self.progress_bar {
pb.finish_with_message("Complete");
}
}
pub fn display_summary(&self) {
eprintln!();
eprintln!("Indexing complete:");
eprintln!(" |-- Chunks indexed: {}", self.processed_chunks);
eprintln!(" |-- Files processed: {}", self.processed_files);
eprintln!(" | |-- Indexed: {}", self.indexed_files);
if self.skipped_files > 0 {
eprintln!(" | |-- Skipped (duplicate): {}", self.skipped_files);
}
if self.failed_files > 0 {
eprintln!(" | `-- Failed: {}", self.failed_files);
}
if let Some(speed) = self.chunks_per_sec {
eprintln!(" `-- Avg speed: {:.1} chunks/sec", speed);
}
}
pub fn adjust_estimate(&mut self, file_bytes: u64, actual_chunks: usize) {
if file_bytes > 0 && actual_chunks > 0 {
let bytes_per_chunk = file_bytes as f64 / actual_chunks as f64;
let remaining_bytes = self.total_bytes.saturating_sub(file_bytes);
let remaining_chunks = (remaining_bytes as f64 / bytes_per_chunk) as usize;
self.estimated_chunks = actual_chunks + remaining_chunks;
if let Some(ref pb) = self.progress_bar {
pb.set_length(self.estimated_chunks as u64);
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::TempDir;
#[test]
fn test_format_bytes() {
assert_eq!(format_bytes(0), "0 B");
assert_eq!(format_bytes(500), "500 B");
assert_eq!(format_bytes(1024), "1.0 KB");
assert_eq!(format_bytes(1536), "1.5 KB");
assert_eq!(format_bytes(1024 * 1024), "1.0 MB");
assert_eq!(format_bytes(1024 * 1024 * 1024), "1.0 GB");
}
#[test]
fn test_pre_scan_empty() {
let tracker = IndexProgressTracker::pre_scan(&[]);
assert_eq!(tracker.total_files, 0);
assert_eq!(tracker.total_bytes, 0);
assert_eq!(tracker.estimated_chunks, 0);
}
#[test]
fn test_pre_scan_with_files() {
let temp = TempDir::new().unwrap();
let file1 = temp.path().join("file1.txt");
let file2 = temp.path().join("file2.txt");
let mut f1 = std::fs::File::create(&file1).unwrap();
f1.write_all(&[b'a'; 1000]).unwrap();
let mut f2 = std::fs::File::create(&file2).unwrap();
f2.write_all(&[b'b'; 500]).unwrap();
let paths = vec![file1, file2];
let tracker = IndexProgressTracker::pre_scan(&paths);
assert_eq!(tracker.total_files, 2);
assert_eq!(tracker.total_bytes, 1500);
assert_eq!(tracker.estimated_chunks, 3);
}
#[test]
fn test_file_tracking() {
let tracker_paths: Vec<PathBuf> = vec![];
let mut tracker = IndexProgressTracker::pre_scan(&tracker_paths);
tracker.file_indexed(10);
tracker.file_indexed(5);
tracker.file_skipped();
tracker.file_failed();
assert_eq!(tracker.processed_files, 4);
assert_eq!(tracker.indexed_files, 2);
assert_eq!(tracker.skipped_files, 1);
assert_eq!(tracker.failed_files, 1);
assert_eq!(tracker.processed_chunks, 15);
}
#[test]
fn test_calibration_flow() {
let tracker_paths: Vec<PathBuf> = vec![];
let mut tracker = IndexProgressTracker::pre_scan(&tracker_paths);
assert!(!tracker.is_calibrated());
tracker.start_calibration();
std::thread::sleep(std::time::Duration::from_millis(10));
tracker.finish_calibration(100, "test-model");
assert!(tracker.is_calibrated());
assert!(tracker.chunks_per_sec.is_some());
assert_eq!(tracker.embedder_model, Some("test-model".to_string()));
}
#[test]
fn test_adjust_estimate() {
let tracker_paths: Vec<PathBuf> = vec![];
let mut tracker = IndexProgressTracker::pre_scan(&tracker_paths);
tracker.total_bytes = 10000;
tracker.estimated_chunks = 20;
tracker.adjust_estimate(1000, 5);
assert_eq!(tracker.estimated_chunks, 50);
}
}