pmat 3.11.0 - Docs.rs

#![cfg_attr(coverage_nightly, coverage(off))]
//! Unified WebAssembly Analyzer - Parse Once, Extract Twice
//!
//! This module eliminates the performance bottleneck of parsing WASM files twice
//! (once for AST extraction, once for complexity analysis) by combining both
//! operations into a single parse pass.
//!
//! # Performance Impact
//!
//! Before: 2x parse calls per file (AST + Complexity)
//! After: 1x parse call per file
//! Expected gain: 40-50% reduction in parse time

use anyhow::Result;
use std::path::{Path, PathBuf};
#[cfg(test)]
use std::sync::atomic::{AtomicUsize, Ordering};

use crate::services::complexity::{ComplexityMetrics, FileComplexityMetrics, FunctionComplexity};
use crate::services::context::AstItem;
use crate::services::languages::wasm::WasmModuleAnalyzer;

/// Unified analyzer that parses WASM once, extracts twice
pub struct UnifiedWasmAnalyzer {
    file_path: PathBuf,

    /// Parse count tracker (test-only)
    #[cfg(test)]
    parse_count: AtomicUsize,
}

/// Combined result from unified analysis
#[derive(Debug)]
pub struct UnifiedAnalysis {
    /// AST items (functions, exports)
    pub ast_items: Vec<AstItem>,

    /// File-level complexity metrics
    pub file_metrics: FileComplexityMetrics,

    /// Parse timestamp (for cache validation)
    pub parsed_at: std::time::Instant,
}

/// Error type for unified analysis
#[derive(Debug, thiserror::Error)]
pub enum AnalysisError {
    #[error("Failed to read file: {0}")]
    Io(#[from] std::io::Error),

    #[error("Failed to parse WASM: {0}")]
    Parse(String),

    #[error("Analysis error: {0}")]
    Analysis(String),
}

impl UnifiedWasmAnalyzer {
    /// Create new analyzer for a file
    pub fn new(file_path: PathBuf) -> Self {
        Self {
            file_path,
            #[cfg(test)]
            parse_count: AtomicUsize::new(0),
        }
    }

    /// Get the file path being analyzed
    pub fn file_path(&self) -> &Path {
        &self.file_path
    }

    /// Analyze file with single parse
    ///
    /// This is the core GREEN phase implementation: minimal but correct.
    pub async fn analyze(&self) -> Result<UnifiedAnalysis, AnalysisError> {
        // Track parse count for testing
        #[cfg(test)]
        {
            self.parse_count.fetch_add(1, Ordering::SeqCst);
        }

        // 1. Read file content (single I/O operation)
        let content = tokio::fs::read_to_string(&self.file_path)
            .await
            .map_err(AnalysisError::Io)?;

        // 2. Extract AST items using existing WASM analyzer
        let analyzer = WasmModuleAnalyzer::new(&self.file_path);
        let ast_items = analyzer
            .analyze_wat_text(&content)
            .map_err(AnalysisError::Parse)?;

        // 3. Extract complexity metrics (GREEN phase - simple pattern matching)
        let file_metrics = self.extract_complexity_metrics(&content);

        Ok(UnifiedAnalysis {
            ast_items,
            file_metrics,
            parsed_at: std::time::Instant::now(),
        })
    }

    /// Get parse count (test-only, for verifying single parse)
    #[cfg(test)]
    pub fn parse_count(&self) -> usize {
        self.parse_count.load(Ordering::SeqCst)
    }

    /// Extract complexity metrics from WASM content
    ///
    /// GREEN PHASE: Minimal implementation using pattern matching.
    /// This will be enhanced in REFACTOR phase with proper WASM instruction analysis.
    #[allow(clippy::cast_possible_truncation)]
    fn extract_complexity_metrics(&self, content: &str) -> FileComplexityMetrics {
        let mut functions = Vec::new();
        let lines = content.lines().count();

        // Simple function detection for WAT format
        let mut current_function: Option<String> = None;
        let mut function_complexity = 1u32;
        let mut line_start = 0;

        for (line_num, line) in content.lines().enumerate() {
            let trimmed = line.trim();

            // Function start
            if trimmed.contains("(func ") {
                if let Some(func_name) = self.extract_function_name(trimmed) {
                    current_function = Some(func_name);
                    function_complexity = 1;
                    line_start = line_num;
                }
            }

            // Count control flow keywords for complexity
            if current_function.is_some() {
                if trimmed.contains("if ") || trimmed.contains("if(") {
                    function_complexity += 1;
                }
                if trimmed.contains("loop ") || trimmed.contains("loop(") {
                    function_complexity += 1;
                }
                if trimmed.contains("br_if ") {
                    function_complexity += 1;
                }
                if trimmed.contains("br_table ") {
                    function_complexity += 2; // Table branches are more complex
                }
            }

            // Function end (closing parenthesis at same level)
            if trimmed == ")" && current_function.is_some() {
                let name = current_function
                    .take()
                    .expect("guarded by is_some() check above");
                functions.push(FunctionComplexity {
                    name,
                    line_start: line_start as u32,
                    line_end: line_num as u32,
                    metrics: ComplexityMetrics {
                        cyclomatic: function_complexity as u16,
                        cognitive: function_complexity as u16, // Simplified for GREEN phase
                        nesting_max: 0,
                        lines: (line_num - line_start) as u16,
                        halstead: None,
                    },
                });
            }
        }

        // Calculate file-level metrics
        let total_cyclomatic: u32 = functions.iter().map(|f| f.metrics.cyclomatic as u32).sum();

        let avg_cyclomatic = if functions.is_empty() {
            1
        } else {
            total_cyclomatic / functions.len() as u32
        };

        FileComplexityMetrics {
            path: self.file_path.display().to_string(),
            total_complexity: ComplexityMetrics {
                cyclomatic: avg_cyclomatic as u16,
                cognitive: avg_cyclomatic as u16,
                nesting_max: 0,
                lines: lines as u16,
                halstead: None,
            },
            functions,
            classes: Vec::new(), // WASM doesn't have classes
        }
    }

    /// Extract function name from WAT line
    fn extract_function_name(&self, line: &str) -> Option<String> {
        // Pattern: (func $name ...
        if let Some(start) = line.find("$") {
            let rest = line.get(start + 1..).unwrap_or_default();
            if let Some(end) = rest.find(|c: char| c.is_whitespace() || c == ')') {
                return Some(format!("${}", rest.get(..end).unwrap_or_default()));
            }
        }
        // Unnamed function
        Some("$func".to_string())
    }
}

#[cfg_attr(coverage_nightly, coverage(off))]
#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_analyzer_creation() {
        let path = PathBuf::from("test.wat");
        let analyzer = UnifiedWasmAnalyzer::new(path.clone());
        assert_eq!(analyzer.file_path(), path.as_path());
    }

    #[tokio::test]
    async fn test_parse_count_increments() {
        let temp_file = tempfile::NamedTempFile::with_suffix(".wat").unwrap();
        std::fs::write(temp_file.path(), "(module)").unwrap();

        let analyzer = UnifiedWasmAnalyzer::new(temp_file.path().to_path_buf());

        assert_eq!(analyzer.parse_count(), 0);

        let _ = analyzer.analyze().await;
        assert_eq!(analyzer.parse_count(), 1);
    }
}