uncomment 2.5.0

A CLI tool to remove comments from code using tree-sitter for accurate parsing
Documentation
use clap::Parser;
use std::path::PathBuf;
use std::time::Instant;

mod bench {
    include!("../../bench/mod.rs");
}

#[derive(Parser)]
#[command(
    name = "benchmark",
    about = "Benchmark tool for testing uncomment performance on large codebases"
)]
struct BenchmarkCli {
    /// Path to the uncomment binary to test
    #[arg(short, long, default_value = "./target/release/uncomment")]
    uncomment_binary: PathBuf,

    /// Target directory to process
    #[arg(short, long)]
    target: PathBuf,

    /// Sample size (limit number of files for quick testing)
    #[arg(short, long)]
    sample_size: Option<usize>,

    /// Run multiple iterations for averaging
    #[arg(short, long, default_value = "1")]
    iterations: usize,

    /// Test only specific language files
    #[arg(short, long)]
    language: Option<String>,

    /// Enable memory profiling (requires additional tools)
    #[arg(short, long)]
    memory_profile: bool,

    /// Number of threads for parallel processing (0 = auto)
    #[arg(short = 'j', long, default_value = "1")]
    threads: usize,
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let cli = BenchmarkCli::parse();

    // Validate inputs
    if !cli.uncomment_binary.exists() {
        eprintln!(
            "❌ Uncomment binary not found: {}",
            cli.uncomment_binary.display()
        );
        std::process::exit(1);
    }

    if !cli.target.exists() {
        eprintln!("❌ Target directory not found: {}", cli.target.display());
        std::process::exit(1);
    }

    println!("🎯 UNCOMMENT PERFORMANCE BENCHMARK");
    println!("==================================");
    println!("πŸ”§ Binary: {}", cli.uncomment_binary.display());
    println!("πŸ“ Target: {}", cli.target.display());
    println!("πŸ”„ Iterations: {}", cli.iterations);

    if let Some(lang) = &cli.language {
        println!("πŸ—£οΈ  Language filter: {lang}");
    }

    if cli.memory_profile {
        println!("πŸ’Ύ Memory profiling: enabled");
    }

    let mut results = Vec::new();
    let overall_start = Instant::now();

    // Run benchmark iterations
    for iteration in 1..=cli.iterations {
        println!("\nπŸƒ Running iteration {}/{}...", iteration, cli.iterations);

        let result = bench::run_benchmark(&cli.uncomment_binary, &cli.target, cli.sample_size)?;

        result.print_summary();
        results.push(result);

        if cli.iterations > 1 && iteration < cli.iterations {
            println!("\n⏸️  Waiting 2s before next iteration...");
            std::thread::sleep(std::time::Duration::from_secs(2));
        }
    }

    let total_duration = overall_start.elapsed();

    // Print aggregate results if multiple iterations
    if cli.iterations > 1 {
        println!("\nπŸ“Š AGGREGATE RESULTS ({} iterations)", cli.iterations);
        println!("=====================================");

        let avg_duration = results
            .iter()
            .map(|r| r.duration.as_secs_f64())
            .sum::<f64>()
            / results.len() as f64;

        let avg_files_per_sec =
            results.iter().map(|r| r.files_per_second).sum::<f64>() / results.len() as f64;

        let avg_comments_per_sec =
            results.iter().map(|r| r.comments_per_second).sum::<f64>() / results.len() as f64;

        let total_files = results[0].total_files; // Should be same across iterations
        let total_comments = results
            .iter()
            .map(|r| r.total_comments_removed)
            .sum::<usize>()
            / results.len();

        println!("⏱️  Average duration: {avg_duration:.2}s");
        println!("πŸš€ Average files/sec: {avg_files_per_sec:.1}");
        println!("πŸ’¬ Average comments/sec: {avg_comments_per_sec:.1}");
        println!("πŸ“‚ Total files: {total_files}");
        println!("πŸ—‘οΈ  Avg comments removed: {total_comments}");

        // Calculate consistency
        let durations: Vec<f64> = results.iter().map(|r| r.duration.as_secs_f64()).collect();
        let min_duration = durations.iter().fold(f64::INFINITY, |a, &b| a.min(b));
        let max_duration = durations.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b));
        let variance = (max_duration - min_duration) / avg_duration * 100.0;

        println!("πŸ“ˆ Performance variance: {variance:.1}%");
        println!(
            "⏰ Total benchmark time: {:.2}s",
            total_duration.as_secs_f64()
        );
    }

    // Performance analysis and recommendations
    println!("\nπŸ” PERFORMANCE ANALYSIS");
    println!("=======================");

    let last_result = &results[results.len() - 1];

    if last_result.files_per_second < 10.0 {
        println!(
            "⚠️  Performance concern: Low throughput ({:.1} files/sec)",
            last_result.files_per_second
        );
        println!("πŸ’‘ Consider optimizations:");
        println!("   β€’ Parallel processing");
        println!("   β€’ I/O buffering improvements");
        println!("   β€’ Parser initialization caching");
    } else if last_result.files_per_second < 100.0 {
        println!(
            "βœ… Good performance: {:.1} files/sec",
            last_result.files_per_second
        );
        println!("πŸ’‘ Potential improvements:");
        println!("   β€’ Multi-threading for large directories");
        println!("   β€’ Memory-mapped file reading");
    } else {
        println!(
            "πŸš€ Excellent performance: {:.1} files/sec",
            last_result.files_per_second
        );
        println!("πŸŽ‰ Performance is already optimized!");
    }

    // Estimate processing time for the full Armis codebase
    if cli.sample_size.is_some() {
        let estimated_full_time = 850_000.0 / last_result.files_per_second;
        println!("\nπŸ“Š FULL CODEBASE ESTIMATE");
        println!("=========================");
        println!(
            "🏒 For ~850k files (Armis-scale): ~{:.1} minutes",
            estimated_full_time / 60.0
        );

        if estimated_full_time > 300.0 {
            // > 5 minutes
            println!("⚠️  Consider optimization for large-scale usage");
        }
    }

    println!("\nβœ… Benchmark completed successfully!");

    Ok(())
}