debtmap 0.17.0 - Docs.rs

//! Project analysis module for the analyze command.
//!
//! This module handles the core project analysis logic including file discovery,
//! parsing, and metrics extraction. Follows the Shell pattern for I/O operations.
//!
//! # Extraction Phase (Spec 213)
//!
//! The module now includes unified extraction as an early pipeline phase.
//! Each file is parsed exactly once, with all analysis data extracted upfront.
//! This prevents proc-macro2 SourceMap overflow on large codebases.

use crate::analysis::FileContext;
use crate::config::DebtmapConfig;
use crate::core::{AnalysisResults, DuplicationBlock, FileMetrics, FunctionMetrics, Language};
use crate::extraction::{ExtractedFileData, UnifiedFileExtractor};
use crate::formatting::FormattingConfig;
use crate::io;
use crate::progress::ProgressManager;
use crate::tui::app::StageStatus;
use crate::utils::{analysis_helpers, language_parser};
use crate::{analysis_utils, core::DebtItem};
use anyhow::{Context, Result};
use chrono::Utc;
use rayon::prelude::*;
use std::collections::HashMap;
use std::path::{Path, PathBuf};

use crate::time_span;

use super::config::AnalyzeConfig;

/// Output from project analysis including extracted data for downstream phases.
pub struct ProjectAnalysisOutput {
    /// The analysis results (metrics, debt, etc.)
    pub results: AnalysisResults,
    /// Pre-extracted Rust file data for reuse (avoids re-parsing)
    pub extracted_data: Option<HashMap<PathBuf, ExtractedFileData>>,
}

/// Run project analysis (I/O).
///
/// This is the backward-compatible entry point that discards extracted data.
/// For full single-pass benefits, use `run_analysis_with_extraction` instead.
#[allow(dead_code)] // Kept for backward compatibility
pub fn run_analysis(config: &AnalyzeConfig) -> Result<AnalysisResults> {
    run_analysis_with_extraction(config).map(|output| output.results)
}

/// Run project analysis with extraction data (I/O).
///
/// Returns both the analysis results and the extracted data for downstream phases.
/// This enables single-pass parsing for Rust files.
pub fn run_analysis_with_extraction(config: &AnalyzeConfig) -> Result<ProjectAnalysisOutput> {
    let languages = language_parser::parse_languages(config.languages.clone());
    analyze_project_with_extraction(
        config.path.clone(),
        languages,
        config.threshold_complexity,
        config.threshold_duplication,
        config.parallel,
        config._formatting_config,
    )
}

/// Analyze project and return results (I/O).
pub fn analyze_project(
    path: PathBuf,
    languages: Vec<Language>,
    complexity_threshold: u32,
    duplication_threshold: usize,
    parallel_enabled: bool,
    formatting_config: FormattingConfig,
) -> Result<AnalysisResults> {
    analyze_project_with_extraction(
        path,
        languages,
        complexity_threshold,
        duplication_threshold,
        parallel_enabled,
        formatting_config,
    )
    .map(|output| output.results)
}

/// Analyze project and return results with extracted data (I/O).
///
/// This is the main analysis entry point that extracts Rust files once
/// and reuses the data for metrics and downstream analysis phases.
pub fn analyze_project_with_extraction(
    path: PathBuf,
    languages: Vec<Language>,
    complexity_threshold: u32,
    duplication_threshold: usize,
    parallel_enabled: bool,
    formatting_config: FormattingConfig,
) -> Result<ProjectAnalysisOutput> {
    time_span!("analyze_project");

    setup_parallel_env(parallel_enabled);
    let config = crate::config::get_config();
    init_global_progress();

    start_files_phase();

    let files = discover_files(&path, &languages, config)?;

    // Spec 214: Extract Rust files first, convert to metrics via adapter
    let (file_metrics, extracted_data) =
        parse_and_extract_metrics_hybrid(&files, parallel_enabled, formatting_config)?;

    let (all_functions, all_debt_items, file_contexts) = extract_analysis_data(&file_metrics);

    let duplications = detect_duplications(&files, duplication_threshold);
    complete_files_phase(files.len());

    let results = build_analysis_results(
        path,
        all_functions,
        all_debt_items,
        duplications,
        file_contexts,
        complexity_threshold,
        &file_metrics,
    );

    Ok(ProjectAnalysisOutput {
        results,
        extracted_data,
    })
}

/// Set up parallel processing environment variable.
fn setup_parallel_env(parallel_enabled: bool) {
    if parallel_enabled {
        std::env::set_var("DEBTMAP_PARALLEL", "true");
    }
}

/// Initialize global progress tracker.
fn init_global_progress() {
    let quiet_mode = std::env::var("DEBTMAP_QUIET").is_ok();
    if !quiet_mode {
        io::progress::AnalysisProgress::init_global();
    }
}

/// Start files phase tracking.
fn start_files_phase() {
    io::progress::AnalysisProgress::with_global(|p| p.start_phase(0));
    if let Some(manager) = ProgressManager::global() {
        manager.tui_start_stage(0);
        manager.tui_update_subtask(0, 0, StageStatus::Active, None);
    }
}

/// Discover project files.
fn discover_files(
    path: &Path,
    languages: &[Language],
    config: &DebtmapConfig,
) -> Result<Vec<PathBuf>> {
    time_span!("file_discovery", parent: "analyze_project");

    let files = io::walker::find_project_files_with_config(path, languages.to_vec(), config)
        .context("Failed to find project files")?;

    if let Some(manager) = ProgressManager::global() {
        manager.tui_update_subtask(0, 0, StageStatus::Completed, None);
        std::thread::sleep(std::time::Duration::from_millis(150));
        manager.tui_update_subtask(0, 1, StageStatus::Active, None);
    }

    Ok(files)
}

/// Parse files and extract metrics with progress tracking (legacy).
#[allow(dead_code)]
fn parse_and_extract_metrics(
    files: &[PathBuf],
    parallel_enabled: bool,
    formatting_config: FormattingConfig,
) -> Result<Vec<FileMetrics>> {
    update_file_count(files.len());
    configure_project_size(files, parallel_enabled, formatting_config)?;

    let file_metrics = analysis_utils::collect_file_metrics(files);
    complete_parsing(files.len());

    Ok(file_metrics)
}

/// Result type for hybrid metrics extraction with extracted data.
type HybridMetricsResult = (
    Vec<FileMetrics>,
    Option<HashMap<PathBuf, ExtractedFileData>>,
);

/// Parse files using hybrid approach: extract Rust files, parse others (Spec 214).
///
/// This function:
/// 1. Splits files into Rust and non-Rust
/// 2. Extracts Rust files using UnifiedFileExtractor (single parse)
/// 3. Converts extracted data to FileMetrics via adapter
/// 4. Parses non-Rust files using traditional analyzer
/// 5. Returns combined metrics and extracted data for downstream reuse
fn parse_and_extract_metrics_hybrid(
    files: &[PathBuf],
    parallel_enabled: bool,
    formatting_config: FormattingConfig,
) -> Result<HybridMetricsResult> {
    time_span!("parsing", parent: "analyze_project");

    update_file_count(files.len());
    configure_project_size(files, parallel_enabled, formatting_config)?;

    // Split files by type
    let (rust_files, non_rust_files): (Vec<PathBuf>, Vec<PathBuf>) = files
        .iter()
        .cloned()
        .partition(|p| p.extension().map(|e| e == "rs").unwrap_or(false));

    // Extract Rust files and convert to metrics via adapter
    let (rust_metrics, extracted_data) = if !rust_files.is_empty() {
        let extracted = extract_all_files(&rust_files);
        let metrics =
            crate::extraction::adapters::metrics::all_file_metrics_from_extracted(&extracted);
        (metrics, Some(extracted))
    } else {
        (vec![], None)
    };

    // Parse non-Rust files using traditional path
    let non_rust_metrics = if !non_rust_files.is_empty() {
        analysis_utils::collect_file_metrics(&non_rust_files)
    } else {
        vec![]
    };

    // Combine metrics
    let mut all_metrics = rust_metrics;
    all_metrics.extend(non_rust_metrics);

    // Sort metrics by path to ensure deterministic analysis results (spec 214 fix)
    // Non-deterministic order from HashMap iteration was causing unstable scores
    // and location counts in the prioritization phase.
    all_metrics.sort_by(|a, b| a.path.cmp(&b.path));

    complete_parsing(files.len());

    Ok((all_metrics, extracted_data))
}

/// Update progress with file count.
fn update_file_count(count: usize) {
    io::progress::AnalysisProgress::with_global(|p| {
        p.update_progress(io::progress::PhaseProgress::Count(count));
    });
}

/// Configure project size optimizations.
fn configure_project_size(
    files: &[PathBuf],
    parallel_enabled: bool,
    _formatting_config: FormattingConfig,
) -> Result<()> {
    let file_count = files.len();
    let quiet_mode = std::env::var("DEBTMAP_QUIET").is_ok();

    if !quiet_mode {
        log_project_size_info(file_count, parallel_enabled);
        configure_large_project_env(file_count);
    }

    Ok(())
}

/// Log project size information.
fn log_project_size_info(file_count: usize, parallel_enabled: bool) {
    match file_count {
        0..=100 => log::info!("Analyzing {} files (small project)", file_count),
        101..=500 => {
            log::info!("Analyzing {} files (medium project)", file_count);
            log_parallel_status(parallel_enabled);
        }
        501..=1000 => log::info!("Analyzing {} files (large project)", file_count),
        1001..=2000 => log::info!("Analyzing {} files (very large project)", file_count),
        _ => log_massive_project(file_count),
    }
}

/// Log parallel processing status.
fn log_parallel_status(parallel_enabled: bool) {
    if parallel_enabled {
        log::info!("Parallel processing enabled for better performance");
    } else {
        log::warn!("Using sequential processing (use default for better performance)");
    }
}

/// Log massive project info.
fn log_massive_project(file_count: usize) {
    log::info!("Analyzing {} files (massive project)", file_count);
}

/// Configure environment for large projects.
fn configure_large_project_env(file_count: usize) {
    if file_count > 500 {
        std::env::set_var("RUST_BACKTRACE", "0");
    }
}

/// Complete parsing phase.
fn complete_parsing(file_count: usize) {
    io::progress::AnalysisProgress::with_global(|p| {
        p.update_progress(io::progress::PhaseProgress::Progress {
            current: file_count,
            total: file_count,
        });
        p.complete_phase();
    });

    if let Some(manager) = ProgressManager::global() {
        manager.tui_update_subtask(0, 1, StageStatus::Completed, None);
        std::thread::sleep(std::time::Duration::from_millis(150));
        manager.tui_update_subtask(0, 2, StageStatus::Active, None);
    }
}

/// Extract functions, debt items, and file contexts from metrics.
fn extract_analysis_data(
    file_metrics: &[FileMetrics],
) -> (
    Vec<FunctionMetrics>,
    Vec<DebtItem>,
    HashMap<PathBuf, FileContext>,
) {
    let all_functions = analysis_utils::extract_all_functions(file_metrics);
    let all_debt_items = analysis_utils::extract_all_debt_items(file_metrics);
    let file_contexts = analysis_utils::extract_file_contexts(file_metrics);

    if let Some(manager) = ProgressManager::global() {
        manager.tui_update_counts(all_functions.len(), all_debt_items.len());
        manager.tui_update_subtask(0, 2, StageStatus::Completed, None);
        std::thread::sleep(std::time::Duration::from_millis(150));
        manager.tui_update_subtask(0, 3, StageStatus::Active, Some((0, 0)));
    }

    (all_functions, all_debt_items, file_contexts)
}

/// Detect code duplications.
fn detect_duplications(files: &[PathBuf], threshold: usize) -> Vec<DuplicationBlock> {
    time_span!("duplication_detection", parent: "analyze_project");

    let file_count = files.len();
    let duplications =
        analysis_helpers::detect_duplications_with_progress(files, threshold, |current, total| {
            if let Some(manager) = ProgressManager::global() {
                manager.tui_update_subtask(0, 3, StageStatus::Active, Some((current, total)));
            }
        });

    if let Some(manager) = ProgressManager::global() {
        manager.tui_update_subtask(0, 3, StageStatus::Completed, Some((file_count, file_count)));
    }

    duplications
}

/// Complete files phase.
fn complete_files_phase(file_count: usize) {
    if let Some(manager) = ProgressManager::global() {
        manager.tui_complete_stage(0, format!("{} files parsed", file_count));
        // Stage 0 complete = 1/6 of total progress (6 stages total: 0-5)
        manager.tui_set_progress(1.0 / 6.0);
    }
}

/// Build analysis results from collected data.
fn build_analysis_results(
    path: PathBuf,
    all_functions: Vec<FunctionMetrics>,
    all_debt_items: Vec<DebtItem>,
    duplications: Vec<DuplicationBlock>,
    file_contexts: HashMap<PathBuf, FileContext>,
    complexity_threshold: u32,
    file_metrics: &[FileMetrics],
) -> AnalysisResults {
    let complexity_report =
        analysis_helpers::build_complexity_report(&all_functions, complexity_threshold);
    let technical_debt =
        analysis_helpers::build_technical_debt_report(all_debt_items, duplications.clone());
    let dependencies = analysis_helpers::create_dependency_report(file_metrics);

    AnalysisResults {
        project_path: path,
        timestamp: Utc::now(),
        complexity: complexity_report,
        technical_debt,
        dependencies,
        duplications,
        file_contexts,
    }
}

// ============================================================================
// Unified Extraction Phase (Spec 213)
// ============================================================================

/// Batch size for extraction to prevent SourceMap overflow.
/// 200 files * ~50KB avg = ~10MB per batch, well under the 4GB limit.
const EXTRACTION_BATCH_SIZE: usize = 200;

/// Filter paths to include only Rust source files.
fn filter_rust_files(files: &[PathBuf]) -> Vec<PathBuf> {
    files
        .iter()
        .filter(|p| p.extension().is_some_and(|e| e == "rs"))
        .cloned()
        .collect()
}

/// Read file contents in parallel (I/O operation).
fn read_file_contents(paths: &[PathBuf]) -> Vec<(PathBuf, String)> {
    paths
        .par_iter()
        .filter_map(|path| {
            std::fs::read_to_string(path)
                .ok()
                .map(|content| (path.clone(), content))
        })
        .collect()
}

/// Collect successful extraction results into a HashMap.
fn collect_extraction_results(
    results: Vec<(PathBuf, anyhow::Result<ExtractedFileData>)>,
) -> HashMap<PathBuf, ExtractedFileData> {
    results
        .into_iter()
        .filter_map(|(path, result)| match result {
            Ok(data) => Some((path, data)),
            Err(e) => {
                log::warn!("Failed to extract {}: {}", path.display(), e);
                None
            }
        })
        .collect()
}

/// Extract all data from files in a single pass (I/O).
///
/// Processes files in batches to prevent proc-macro2 SourceMap overflow.
/// Resets SourceMap between batches.
///
/// # Spec 213
///
/// This function implements the "Unified Extraction" phase that runs after
/// file discovery. It parses each file exactly once and extracts all data
/// needed by downstream analysis phases.
pub fn extract_all_files(files: &[PathBuf]) -> HashMap<PathBuf, ExtractedFileData> {
    let rust_files = filter_rust_files(files);
    if rust_files.is_empty() {
        return HashMap::new();
    }

    let contents = read_file_contents(&rust_files);
    let results = UnifiedFileExtractor::extract_batch(&contents, EXTRACTION_BATCH_SIZE);
    collect_extraction_results(results)
}

// Note: Metrics conversion from extracted data moved to extraction adapters (spec 214).
// Use crate::extraction::adapters::metrics for converting ExtractedFileData to FunctionMetrics.

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn filter_rust_files_includes_only_rs_extension() {
        let files = vec![
            PathBuf::from("src/main.rs"),
            PathBuf::from("src/lib.py"),
            PathBuf::from("README.md"),
            PathBuf::from("src/utils.rs"),
        ];

        let result = filter_rust_files(&files);

        assert_eq!(result.len(), 2);
        assert!(result.contains(&PathBuf::from("src/main.rs")));
        assert!(result.contains(&PathBuf::from("src/utils.rs")));
    }

    #[test]
    fn filter_rust_files_handles_empty_input() {
        let files: Vec<PathBuf> = vec![];
        let result = filter_rust_files(&files);
        assert!(result.is_empty());
    }

    #[test]
    fn filter_rust_files_handles_no_rust_files() {
        let files = vec![
            PathBuf::from("src/main.py"),
            PathBuf::from("README.md"),
            PathBuf::from("Cargo.toml"),
        ];

        let result = filter_rust_files(&files);
        assert!(result.is_empty());
    }

    #[test]
    fn filter_rust_files_handles_files_without_extension() {
        let files = vec![
            PathBuf::from("Makefile"),
            PathBuf::from("src/main.rs"),
            PathBuf::from(".gitignore"),
        ];

        let result = filter_rust_files(&files);
        assert_eq!(result.len(), 1);
        assert_eq!(result[0], PathBuf::from("src/main.rs"));
    }

    #[test]
    fn collect_extraction_results_filters_errors() {
        let data = ExtractedFileData::empty(PathBuf::from("test.rs"));
        let results = vec![
            (PathBuf::from("good.rs"), Ok(data)),
            (PathBuf::from("bad.rs"), Err(anyhow::anyhow!("parse error"))),
        ];

        let collected = collect_extraction_results(results);

        assert_eq!(collected.len(), 1);
        assert!(collected.contains_key(&PathBuf::from("good.rs")));
        assert!(!collected.contains_key(&PathBuf::from("bad.rs")));
    }

    #[test]
    fn collect_extraction_results_handles_all_errors() {
        let results = vec![
            (
                PathBuf::from("bad1.rs"),
                Err(anyhow::anyhow!("parse error")),
            ),
            (
                PathBuf::from("bad2.rs"),
                Err(anyhow::anyhow!("syntax error")),
            ),
        ];

        let collected = collect_extraction_results(results);
        assert!(collected.is_empty());
    }

    #[test]
    fn collect_extraction_results_handles_empty_input() {
        let results: Vec<(PathBuf, anyhow::Result<ExtractedFileData>)> = vec![];
        let collected = collect_extraction_results(results);
        assert!(collected.is_empty());
    }
}