go_brrr/metrics/complexity/
cyclomatic.rs

1//! Cyclomatic complexity calculation using control flow graph analysis.
2//!
3//! Cyclomatic complexity measures the structural complexity of code by counting
4//! the number of linearly independent paths through a function. Higher values
5//! indicate more complex code that is harder to test and maintain.
6//!
7//! # Calculation Methods
8//!
9//! This module supports two calculation methods:
10//!
11//! 1. **Decision Point Method** (recommended):
12//!    ```text
13//!    M = decision_points + 1
14//!    ```
15//!    Counts decision points (if, while, for, catch, etc.) directly from CFG.
16//!
17//! 2. **Graph Formula** (classic):
18//!    ```text
19//!    M = E - N + 2P
20//!    ```
21//!    Where E = edges, N = nodes, P = connected components (usually 1).
22//!
23//! # Decision Points by Language
24//!
25//! | Language   | Decision Points                                              |
26//! |------------|--------------------------------------------------------------|
27//! | Python     | if, elif, while, for, except, and, or, comprehension if     |
28//! | TypeScript | if, else if, for, while, catch, &&, \|\|, ?:, ?.            |
29//! | Rust       | if, if let, while, while let, for, match arms, ?            |
30//! | Go         | if, for, switch cases, select cases                          |
31//! | Java       | if, else if, for, while, catch, switch cases                |
32//! | C/C++      | if, else if, for, while, switch cases                       |
33
34use std::collections::HashMap;
35use std::path::{Path, PathBuf};
36use std::simd::{cmp::SimdPartialOrd, u32x8, Mask, Simd};
37
38use rayon::prelude::*;
39use serde::{Deserialize, Serialize};
40use tracing::debug;
41
42use crate::ast::{AstExtractor, FunctionInfo};
43use crate::callgraph::scanner::{ProjectScanner, ScanConfig};
44use crate::cfg::{CfgBuilder, CFGInfo};
45use crate::error::{Result, BrrrError};
46use crate::lang::LanguageRegistry;
47
48// =============================================================================
49// TYPES
50// =============================================================================
51
52/// Risk level classification based on cyclomatic complexity.
53///
54/// Based on widely accepted thresholds from software engineering research:
55/// - McCabe (1976): Original definition
56/// - NIST (1996): Risk thresholds
57/// - Carnegie Mellon SEI: Industry recommendations
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
59#[serde(rename_all = "snake_case")]
60pub enum RiskLevel {
61    /// Complexity 1-10: Simple, low risk, easy to test
62    Low,
63    /// Complexity 11-20: Moderate complexity, consider splitting
64    Medium,
65    /// Complexity 21-50: High complexity, hard to test and maintain
66    High,
67    /// Complexity 50+: Critical, refactor immediately
68    Critical,
69}
70
71impl RiskLevel {
72    /// Classify complexity value into risk level.
73    ///
74    /// # Thresholds
75    ///
76    /// - 1-10: Low risk
77    /// - 11-20: Medium risk
78    /// - 21-50: High risk
79    /// - 50+: Critical risk
80    #[must_use]
81    pub fn from_complexity(complexity: u32) -> Self {
82        match complexity {
83            0..=10 => Self::Low,
84            11..=20 => Self::Medium,
85            21..=50 => Self::High,
86            _ => Self::Critical,
87        }
88    }
89
90    /// Get human-readable description of the risk level.
91    #[must_use]
92    pub const fn description(&self) -> &'static str {
93        match self {
94            Self::Low => "Simple, low risk",
95            Self::Medium => "Moderate complexity, consider refactoring",
96            Self::High => "Complex, hard to test and maintain",
97            Self::Critical => "Critical complexity, refactor immediately",
98        }
99    }
100
101    /// Get the color code for CLI output (ANSI).
102    #[must_use]
103    pub const fn color_code(&self) -> &'static str {
104        match self {
105            Self::Low => "\x1b[32m",       // Green
106            Self::Medium => "\x1b[33m",    // Yellow
107            Self::High => "\x1b[31m",      // Red
108            Self::Critical => "\x1b[35m",  // Magenta
109        }
110    }
111}
112
113impl std::fmt::Display for RiskLevel {
114    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
115        match self {
116            Self::Low => write!(f, "low"),
117            Self::Medium => write!(f, "medium"),
118            Self::High => write!(f, "high"),
119            Self::Critical => write!(f, "critical"),
120        }
121    }
122}
123
124// =============================================================================
125// SIMD-ACCELERATED RISK LEVEL CLASSIFICATION
126// =============================================================================
127
128/// SIMD-accelerated risk level counting using u32x8 (AVX2 256-bit).
129///
130/// Processes 8 complexity values at a time using parallel threshold comparisons.
131/// This provides significant speedup for large codebases with thousands of functions.
132///
133/// # Algorithm
134///
135/// For each vector of 8 values:
136/// 1. Compare against threshold 10 (Low boundary)
137/// 2. Compare against threshold 20 (Medium boundary)
138/// 3. Compare against threshold 50 (High boundary)
139/// 4. Count values in each category using bitmask operations
140///
141/// # Thresholds
142///
143/// - Low: complexity <= 10
144/// - Medium: 10 < complexity <= 20
145/// - High: 20 < complexity <= 50
146/// - Critical: complexity > 50
147#[derive(Debug, Clone, Copy, Default)]
148pub struct RiskLevelCounts {
149    pub low: usize,
150    pub medium: usize,
151    pub high: usize,
152    pub critical: usize,
153}
154
155impl RiskLevelCounts {
156    /// Count risk levels using SIMD-accelerated parallel comparison.
157    ///
158    /// Uses u32x8 vectors to process 8 complexity values simultaneously.
159    /// Falls back to scalar processing for the remaining tail elements.
160    #[must_use]
161    pub fn count_simd(complexities: &[u32]) -> Self {
162        const LANES: usize = 8;
163
164        // Threshold vectors (broadcast constants)
165        let threshold_low: u32x8 = Simd::splat(10);
166        let threshold_medium: u32x8 = Simd::splat(20);
167        let threshold_high: u32x8 = Simd::splat(50);
168
169        let mut low_count: usize = 0;
170        let mut medium_count: usize = 0;
171        let mut high_count: usize = 0;
172        let mut critical_count: usize = 0;
173
174        let chunks = complexities.len() / LANES;
175        let remainder = complexities.len() % LANES;
176
177        // Process 8 values at a time using SIMD
178        for chunk_idx in 0..chunks {
179            let offset = chunk_idx * LANES;
180            let values = u32x8::from_slice(&complexities[offset..offset + LANES]);
181
182            // Parallel threshold comparisons:
183            // is_low:      value <= 10
184            // is_medium:   value > 10 AND value <= 20
185            // is_high:     value > 20 AND value <= 50
186            // is_critical: value > 50
187
188            let le_10: Mask<i32, 8> = values.simd_le(threshold_low);
189            let le_20: Mask<i32, 8> = values.simd_le(threshold_medium);
190            let le_50: Mask<i32, 8> = values.simd_le(threshold_high);
191
192            // Count bits set in each category mask
193            // Low: <= 10
194            low_count += le_10.to_bitmask().count_ones() as usize;
195
196            // Medium: > 10 AND <= 20 (le_20 AND NOT le_10)
197            let is_medium = le_20 & !le_10;
198            medium_count += is_medium.to_bitmask().count_ones() as usize;
199
200            // High: > 20 AND <= 50 (le_50 AND NOT le_20)
201            let is_high = le_50 & !le_20;
202            high_count += is_high.to_bitmask().count_ones() as usize;
203
204            // Critical: > 50 (NOT le_50)
205            let is_critical = !le_50;
206            critical_count += is_critical.to_bitmask().count_ones() as usize;
207        }
208
209        // Handle tail elements with scalar fallback
210        let tail_start = chunks * LANES;
211        for &c in &complexities[tail_start..tail_start + remainder] {
212            match c {
213                0..=10 => low_count += 1,
214                11..=20 => medium_count += 1,
215                21..=50 => high_count += 1,
216                _ => critical_count += 1,
217            }
218        }
219
220        Self {
221            low: low_count,
222            medium: medium_count,
223            high: high_count,
224            critical: critical_count,
225        }
226    }
227
228    /// Convert counts to a HashMap with string keys for serialization.
229    #[must_use]
230    pub fn to_hashmap(&self) -> HashMap<String, usize> {
231        let mut map = HashMap::with_capacity(4);
232        if self.low > 0 {
233            map.insert("low".to_string(), self.low);
234        }
235        if self.medium > 0 {
236            map.insert("medium".to_string(), self.medium);
237        }
238        if self.high > 0 {
239            map.insert("high".to_string(), self.high);
240        }
241        if self.critical > 0 {
242            map.insert("critical".to_string(), self.critical);
243        }
244        map
245    }
246}
247
248/// Cyclomatic complexity result for a single function.
249#[derive(Debug, Clone, Serialize, Deserialize)]
250pub struct CyclomaticComplexity {
251    /// Function name (may include class prefix for methods)
252    pub function_name: String,
253    /// File path containing the function
254    pub file: PathBuf,
255    /// Starting line number (1-indexed)
256    pub line: usize,
257    /// Ending line number (1-indexed)
258    pub end_line: usize,
259    /// Calculated cyclomatic complexity value
260    pub complexity: u32,
261    /// Risk level classification
262    pub risk_level: RiskLevel,
263    /// Number of decision points detected
264    pub decision_points: u32,
265    /// Number of CFG nodes (basic blocks)
266    pub nodes: usize,
267    /// Number of CFG edges
268    pub edges: usize,
269}
270
271impl CyclomaticComplexity {
272    /// Create complexity result from CFG info.
273    fn from_cfg(cfg: &CFGInfo, file: &Path, line: usize, end_line: usize) -> Self {
274        let complexity = cfg.cyclomatic_complexity() as u32;
275        Self {
276            function_name: cfg.function_name.clone(),
277            file: file.to_path_buf(),
278            line,
279            end_line,
280            complexity,
281            risk_level: RiskLevel::from_complexity(complexity),
282            decision_points: cfg.decision_points as u32,
283            nodes: cfg.blocks.len(),
284            edges: cfg.edges.len(),
285        }
286    }
287}
288
289/// Complexity information for a single function (simplified output).
290#[derive(Debug, Clone, Serialize, Deserialize)]
291pub struct FunctionComplexity {
292    /// Function name
293    pub name: String,
294    /// Starting line number
295    pub line: usize,
296    /// Cyclomatic complexity value
297    pub complexity: u32,
298    /// Risk level
299    pub risk_level: RiskLevel,
300}
301
302impl From<&CyclomaticComplexity> for FunctionComplexity {
303    fn from(cc: &CyclomaticComplexity) -> Self {
304        Self {
305            name: cc.function_name.clone(),
306            line: cc.line,
307            complexity: cc.complexity,
308            risk_level: cc.risk_level,
309        }
310    }
311}
312
313/// Aggregate complexity statistics.
314#[derive(Debug, Clone, Serialize, Deserialize)]
315pub struct ComplexityStats {
316    /// Total number of functions analyzed
317    pub total_functions: usize,
318    /// Average complexity across all functions
319    pub average_complexity: f64,
320    /// Maximum complexity found
321    pub max_complexity: u32,
322    /// Minimum complexity found
323    pub min_complexity: u32,
324    /// Median complexity
325    pub median_complexity: u32,
326    /// Distribution by risk level
327    pub risk_distribution: HashMap<String, usize>,
328    /// Complexity histogram buckets (1-5, 6-10, 11-15, ...)
329    pub histogram: Vec<HistogramBucket>,
330}
331
332/// Histogram bucket for complexity distribution.
333#[derive(Debug, Clone, Serialize, Deserialize)]
334pub struct HistogramBucket {
335    /// Lower bound (inclusive)
336    pub min: u32,
337    /// Upper bound (inclusive)
338    pub max: u32,
339    /// Label for display
340    pub label: String,
341    /// Number of functions in this bucket
342    pub count: usize,
343}
344
345impl ComplexityStats {
346    /// Calculate statistics from a list of complexity values.
347    fn from_complexities(complexities: &[u32]) -> Self {
348        if complexities.is_empty() {
349            return Self {
350                total_functions: 0,
351                average_complexity: 0.0,
352                max_complexity: 0,
353                min_complexity: 0,
354                median_complexity: 0,
355                risk_distribution: HashMap::new(),
356                histogram: Vec::new(),
357            };
358        }
359
360        let total = complexities.len();
361        let sum: u64 = complexities.iter().map(|&c| u64::from(c)).sum();
362        let average = sum as f64 / total as f64;
363
364        let max = *complexities.iter().max().unwrap_or(&0);
365        let min = *complexities.iter().min().unwrap_or(&0);
366
367        // Calculate median
368        let mut sorted = complexities.to_vec();
369        sorted.sort_unstable();
370        let median = if total % 2 == 0 {
371            (sorted[total / 2 - 1] + sorted[total / 2]) / 2
372        } else {
373            sorted[total / 2]
374        };
375
376        // Risk distribution using SIMD-accelerated parallel classification
377        let risk_counts = RiskLevelCounts::count_simd(complexities);
378        let risk_distribution = risk_counts.to_hashmap();
379
380        // Build histogram (buckets of 5)
381        let histogram = Self::build_histogram(complexities, max);
382
383        Self {
384            total_functions: total,
385            average_complexity: average,
386            max_complexity: max,
387            min_complexity: min,
388            median_complexity: median,
389            risk_distribution,
390            histogram,
391        }
392    }
393
394    /// Build histogram with buckets of 5.
395    fn build_histogram(complexities: &[u32], max: u32) -> Vec<HistogramBucket> {
396        let bucket_size = 5u32;
397        let num_buckets = ((max / bucket_size) + 1) as usize;
398
399        let mut buckets = Vec::with_capacity(num_buckets.min(20)); // Cap at 20 buckets
400
401        for i in 0..num_buckets.min(20) {
402            let min_val = (i as u32) * bucket_size + 1;
403            let max_val = min_val + bucket_size - 1;
404            let count = complexities.iter()
405                .filter(|&&c| c >= min_val && c <= max_val)
406                .count();
407
408            buckets.push(HistogramBucket {
409                min: min_val,
410                max: max_val,
411                label: format!("{}-{}", min_val, max_val),
412                count,
413            });
414        }
415
416        // Handle overflow bucket for 100+
417        if max > 100 {
418            let overflow_count = complexities.iter().filter(|&&c| c > 100).count();
419            if overflow_count > 0 {
420                buckets.push(HistogramBucket {
421                    min: 101,
422                    max: u32::MAX,
423                    label: "100+".to_string(),
424                    count: overflow_count,
425                });
426            }
427        }
428
429        buckets
430    }
431}
432
433/// Complete complexity analysis result for a file or project.
434#[derive(Debug, Clone, Serialize, Deserialize)]
435pub struct ComplexityAnalysis {
436    /// Path that was analyzed (file or directory)
437    pub path: PathBuf,
438    /// Language filter applied (if any)
439    pub language: Option<String>,
440    /// Individual function complexities
441    pub functions: Vec<CyclomaticComplexity>,
442    /// Functions exceeding threshold (if threshold was specified)
443    #[serde(skip_serializing_if = "Option::is_none")]
444    pub violations: Option<Vec<CyclomaticComplexity>>,
445    /// Aggregate statistics
446    pub stats: ComplexityStats,
447    /// Threshold used for filtering (if any)
448    #[serde(skip_serializing_if = "Option::is_none")]
449    pub threshold: Option<u32>,
450    /// Files that could not be analyzed
451    #[serde(skip_serializing_if = "Vec::is_empty")]
452    pub errors: Vec<AnalysisError>,
453}
454
455/// Error encountered during complexity analysis.
456#[derive(Debug, Clone, Serialize, Deserialize)]
457pub struct AnalysisError {
458    /// File path where error occurred
459    pub file: PathBuf,
460    /// Error message
461    pub message: String,
462}
463
464// =============================================================================
465// ANALYSIS FUNCTIONS
466// =============================================================================
467
468/// Analyze cyclomatic complexity for a project or directory.
469///
470/// Scans all source files in the directory, extracts functions, and calculates
471/// cyclomatic complexity for each function using CFG analysis.
472///
473/// # Arguments
474///
475/// * `path` - Path to file or directory to analyze
476/// * `language` - Optional language filter (e.g., "python", "typescript")
477/// * `threshold` - Optional complexity threshold for filtering results
478///
479/// # Returns
480///
481/// Complete analysis result with individual complexities and statistics.
482///
483/// # Errors
484///
485/// Returns error if:
486/// - Path does not exist
487/// - No source files found
488/// - All files failed to parse
489///
490/// # Example
491///
492/// ```ignore
493/// use go_brrr::metrics::analyze_complexity;
494///
495/// // Analyze all Python files, report functions with complexity > 10
496/// let result = analyze_complexity("./src", Some("python"), Some(10))?;
497///
498/// for func in &result.functions {
499///     println!("{}: {} ({})", func.function_name, func.complexity, func.risk_level);
500/// }
501/// ```
502pub fn analyze_complexity(
503    path: impl AsRef<Path>,
504    language: Option<&str>,
505    threshold: Option<u32>,
506) -> Result<ComplexityAnalysis> {
507    let path = path.as_ref();
508
509    if !path.exists() {
510        return Err(BrrrError::Io(std::io::Error::new(
511            std::io::ErrorKind::NotFound,
512            format!("Path not found: {}", path.display()),
513        )));
514    }
515
516    if path.is_file() {
517        return analyze_file_complexity(path, threshold);
518    }
519
520    // Directory analysis - scan for source files
521    let path_str = path.to_str().ok_or_else(|| {
522        BrrrError::InvalidArgument("Invalid path encoding".to_string())
523    })?;
524
525    let scanner = ProjectScanner::new(path_str)?;
526
527    let config = if let Some(lang) = language {
528        ScanConfig::for_language(lang)
529    } else {
530        ScanConfig::default()
531    };
532
533    let scan_result = scanner.scan_with_config(&config)?;
534
535    if scan_result.files.is_empty() {
536        return Err(BrrrError::InvalidArgument(format!(
537            "No source files found in {} (filter: {:?})",
538            path.display(),
539            language
540        )));
541    }
542
543    debug!("Analyzing {} files for complexity", scan_result.files.len());
544
545    // Analyze files in parallel
546    let results: Vec<(Vec<CyclomaticComplexity>, Vec<AnalysisError>)> = scan_result
547        .files
548        .par_iter()
549        .map(|file| analyze_file_functions(file, threshold))
550        .collect();
551
552    // Aggregate results
553    let mut all_functions = Vec::new();
554    let mut all_errors = Vec::new();
555
556    for (functions, errors) in results {
557        all_functions.extend(functions);
558        all_errors.extend(errors);
559    }
560
561    // Calculate statistics
562    let complexities: Vec<u32> = all_functions.iter().map(|f| f.complexity).collect();
563    let stats = ComplexityStats::from_complexities(&complexities);
564
565    // Filter violations if threshold specified
566    let violations = threshold.map(|t| {
567        all_functions
568            .iter()
569            .filter(|f| f.complexity > t)
570            .cloned()
571            .collect::<Vec<_>>()
572    });
573
574    Ok(ComplexityAnalysis {
575        path: path.to_path_buf(),
576        language: language.map(String::from),
577        functions: all_functions,
578        violations,
579        stats,
580        threshold,
581        errors: all_errors,
582    })
583}
584
585/// Analyze cyclomatic complexity for a single file.
586///
587/// Extracts all functions from the file and calculates complexity for each.
588///
589/// # Arguments
590///
591/// * `file` - Path to source file
592/// * `threshold` - Optional complexity threshold for filtering
593///
594/// # Returns
595///
596/// Analysis result with function complexities and statistics.
597pub fn analyze_file_complexity(
598    file: impl AsRef<Path>,
599    threshold: Option<u32>,
600) -> Result<ComplexityAnalysis> {
601    let file = file.as_ref();
602
603    if !file.exists() {
604        return Err(BrrrError::Io(std::io::Error::new(
605            std::io::ErrorKind::NotFound,
606            format!("File not found: {}", file.display()),
607        )));
608    }
609
610    if !file.is_file() {
611        return Err(BrrrError::InvalidArgument(format!(
612            "Expected a file, got directory: {}",
613            file.display()
614        )));
615    }
616
617    let (functions, errors) = analyze_file_functions(file, threshold);
618
619    let complexities: Vec<u32> = functions.iter().map(|f| f.complexity).collect();
620    let stats = ComplexityStats::from_complexities(&complexities);
621
622    let violations = threshold.map(|t| {
623        functions
624            .iter()
625            .filter(|f| f.complexity > t)
626            .cloned()
627            .collect::<Vec<_>>()
628    });
629
630    // Detect language from file extension
631    let registry = LanguageRegistry::global();
632    let language = registry
633        .detect_language(file)
634        .map(|l| l.name().to_string());
635
636    Ok(ComplexityAnalysis {
637        path: file.to_path_buf(),
638        language,
639        functions,
640        violations,
641        stats,
642        threshold,
643        errors,
644    })
645}
646
647/// Internal function to analyze all functions in a file.
648///
649/// Returns a tuple of (successful analyses, errors encountered).
650fn analyze_file_functions(
651    file: &Path,
652    _threshold: Option<u32>,
653) -> (Vec<CyclomaticComplexity>, Vec<AnalysisError>) {
654    let mut results = Vec::new();
655    let mut errors = Vec::new();
656
657    // Extract module info to get function list
658    let module = match AstExtractor::extract_file(file) {
659        Ok(m) => m,
660        Err(e) => {
661            errors.push(AnalysisError {
662                file: file.to_path_buf(),
663                message: format!("Failed to parse file: {}", e),
664            });
665            return (results, errors);
666        }
667    };
668
669    let file_str = match file.to_str() {
670        Some(s) => s,
671        None => {
672            errors.push(AnalysisError {
673                file: file.to_path_buf(),
674                message: "Invalid file path encoding".to_string(),
675            });
676            return (results, errors);
677        }
678    };
679
680    // Analyze top-level functions
681    for func in &module.functions {
682        let start_line = func.line_number;
683        let end_line = func.end_line_number.unwrap_or(start_line);
684        match analyze_function(file_str, &func.name, start_line, end_line) {
685            Ok(complexity) => results.push(complexity),
686            Err(e) => {
687                debug!("Failed to analyze function {}: {}", func.name, e);
688                // Fall back to simple estimation from function info
689                if let Some(complexity) = estimate_complexity_from_function(func, file) {
690                    results.push(complexity);
691                }
692            }
693        }
694    }
695
696    // Analyze class methods
697    for class in &module.classes {
698        for method in &class.methods {
699            let qualified_name = format!("{}.{}", class.name, method.name);
700            let start_line = method.line_number;
701            let end_line = method.end_line_number.unwrap_or(start_line);
702            match analyze_function(file_str, &qualified_name, start_line, end_line) {
703                Ok(mut complexity) => {
704                    // Keep the qualified name for methods
705                    complexity.function_name = qualified_name;
706                    results.push(complexity);
707                }
708                Err(e) => {
709                    debug!("Failed to analyze method {}: {}", qualified_name, e);
710                    if let Some(mut complexity) = estimate_complexity_from_function(method, file) {
711                        complexity.function_name = qualified_name;
712                        results.push(complexity);
713                    }
714                }
715            }
716        }
717    }
718
719    (results, errors)
720}
721
722/// Analyze a single function using CFG extraction.
723fn analyze_function(
724    file: &str,
725    function_name: &str,
726    start_line: usize,
727    end_line: usize,
728) -> Result<CyclomaticComplexity> {
729    let cfg = CfgBuilder::extract_from_file(file, function_name)?;
730    Ok(CyclomaticComplexity::from_cfg(&cfg, Path::new(file), start_line, end_line))
731}
732
733/// Estimate complexity from function info when CFG extraction fails.
734///
735/// This is a fallback that provides approximate complexity based on
736/// function structure (nesting depth, parameters, etc.).
737fn estimate_complexity_from_function(func: &FunctionInfo, file: &Path) -> Option<CyclomaticComplexity> {
738    // Simple heuristic: base complexity of 1
739    // Real implementation would count decision keywords in source
740    let base_complexity = 1u32;
741    let start_line = func.line_number;
742    let end_line = func.end_line_number.unwrap_or(start_line);
743
744    Some(CyclomaticComplexity {
745        function_name: func.name.clone(),
746        file: file.to_path_buf(),
747        line: start_line,
748        end_line,
749        complexity: base_complexity,
750        risk_level: RiskLevel::from_complexity(base_complexity),
751        decision_points: 0,
752        nodes: 0,
753        edges: 0,
754    })
755}
756
757// =============================================================================
758// ADDITIONAL COMPLEXITY CALCULATIONS
759// =============================================================================
760
761/// Calculate complexity using the graph formula: M = E - N + 2P.
762///
763/// This is the classic McCabe formula where:
764/// - E = number of edges in the control flow graph
765/// - N = number of nodes (basic blocks)
766/// - P = number of connected components (usually 1 for a single function)
767///
768/// # Note
769///
770/// This may give slightly different results than the decision-point method
771/// due to unreachable code blocks and graph structure variations.
772#[allow(dead_code)]
773pub fn calculate_graph_complexity(cfg: &CFGInfo) -> u32 {
774    cfg.cyclomatic_complexity_graph() as u32
775}
776
777/// Count additional decision points from boolean operators.
778///
779/// Boolean operators (&& and ||) in conditions create additional paths
780/// due to short-circuit evaluation. This function counts them in
781/// condition expressions.
782///
783/// # Decision Points from Boolean Operators
784///
785/// ```text
786/// if (a && b)    // 2 decision points: a, then b (if a is true)
787/// if (a || b)    // 2 decision points: a, then b (if a is false)
788/// if (a && b && c)  // 3 decision points
789/// ```
790#[allow(dead_code)]
791pub fn count_boolean_operators(condition: &str) -> u32 {
792    let and_count = condition.matches("&&").count() as u32;
793    let or_count = condition.matches("||").count() as u32;
794
795    // Python uses 'and' and 'or'
796    let py_and_count = count_word_boundary("and", condition) as u32;
797    let py_or_count = count_word_boundary("or", condition) as u32;
798
799    and_count + or_count + py_and_count + py_or_count
800}
801
802/// Count occurrences of a word at word boundaries.
803fn count_word_boundary(word: &str, text: &str) -> usize {
804    let word_len = word.len();
805    let text_bytes = text.as_bytes();
806    let word_bytes = word.as_bytes();
807    let mut count = 0;
808
809    for i in 0..=text.len().saturating_sub(word_len) {
810        if &text_bytes[i..i + word_len] == word_bytes {
811            let before_ok = i == 0 || !text_bytes[i - 1].is_ascii_alphanumeric();
812            let after_ok = i + word_len >= text.len()
813                || !text_bytes[i + word_len].is_ascii_alphanumeric();
814
815            if before_ok && after_ok {
816                count += 1;
817            }
818        }
819    }
820
821    count
822}
823
824// =============================================================================
825// TESTS
826// =============================================================================
827
828#[cfg(test)]
829mod tests {
830    use super::*;
831    use std::io::Write;
832    use tempfile::NamedTempFile;
833
834    fn create_temp_file(content: &str, extension: &str) -> NamedTempFile {
835        let mut file = tempfile::Builder::new()
836            .suffix(extension)
837            .tempfile()
838            .expect("Failed to create temp file");
839        file.write_all(content.as_bytes())
840            .expect("Failed to write to temp file");
841        file
842    }
843
844    #[test]
845    fn test_risk_level_classification() {
846        assert_eq!(RiskLevel::from_complexity(1), RiskLevel::Low);
847        assert_eq!(RiskLevel::from_complexity(10), RiskLevel::Low);
848        assert_eq!(RiskLevel::from_complexity(11), RiskLevel::Medium);
849        assert_eq!(RiskLevel::from_complexity(20), RiskLevel::Medium);
850        assert_eq!(RiskLevel::from_complexity(21), RiskLevel::High);
851        assert_eq!(RiskLevel::from_complexity(50), RiskLevel::High);
852        assert_eq!(RiskLevel::from_complexity(51), RiskLevel::Critical);
853        assert_eq!(RiskLevel::from_complexity(100), RiskLevel::Critical);
854    }
855
856    #[test]
857    fn test_risk_level_display() {
858        assert_eq!(RiskLevel::Low.to_string(), "low");
859        assert_eq!(RiskLevel::Medium.to_string(), "medium");
860        assert_eq!(RiskLevel::High.to_string(), "high");
861        assert_eq!(RiskLevel::Critical.to_string(), "critical");
862    }
863
864    #[test]
865    fn test_simple_function_complexity() {
866        let source = r#"
867def simple():
868    return 42
869"#;
870        let file = create_temp_file(source, ".py");
871        let result = analyze_file_complexity(file.path(), None);
872
873        assert!(result.is_ok(), "Analysis should succeed");
874        let analysis = result.unwrap();
875
876        assert_eq!(analysis.functions.len(), 1);
877        assert_eq!(analysis.functions[0].function_name, "simple");
878        assert_eq!(analysis.functions[0].complexity, 1);
879        assert_eq!(analysis.functions[0].risk_level, RiskLevel::Low);
880    }
881
882    #[test]
883    fn test_if_statement_complexity() {
884        let source = r#"
885def with_if(x):
886    if x > 0:
887        return 1
888    return 0
889"#;
890        let file = create_temp_file(source, ".py");
891        let result = analyze_file_complexity(file.path(), None);
892
893        assert!(result.is_ok());
894        let analysis = result.unwrap();
895
896        assert_eq!(analysis.functions.len(), 1);
897        assert_eq!(analysis.functions[0].complexity, 2); // 1 decision point + 1
898    }
899
900    #[test]
901    fn test_if_elif_else_complexity() {
902        let source = r#"
903def with_elif(x):
904    if x > 0:
905        return "positive"
906    elif x < 0:
907        return "negative"
908    else:
909        return "zero"
910"#;
911        let file = create_temp_file(source, ".py");
912        let result = analyze_file_complexity(file.path(), None);
913
914        assert!(result.is_ok());
915        let analysis = result.unwrap();
916
917        assert_eq!(analysis.functions.len(), 1);
918        assert_eq!(analysis.functions[0].complexity, 3); // 2 decision points + 1
919    }
920
921    #[test]
922    fn test_loop_complexity() {
923        let source = r#"
924def with_loop(items):
925    total = 0
926    for item in items:
927        total += item
928    return total
929"#;
930        let file = create_temp_file(source, ".py");
931        let result = analyze_file_complexity(file.path(), None);
932
933        assert!(result.is_ok());
934        let analysis = result.unwrap();
935
936        assert_eq!(analysis.functions.len(), 1);
937        assert_eq!(analysis.functions[0].complexity, 2); // 1 loop + 1
938    }
939
940    #[test]
941    fn test_nested_complexity() {
942        let source = r#"
943def nested(x, items):
944    if x > 0:
945        for item in items:
946            if item > x:
947                return item
948    return None
949"#;
950        let file = create_temp_file(source, ".py");
951        let result = analyze_file_complexity(file.path(), None);
952
953        assert!(result.is_ok());
954        let analysis = result.unwrap();
955
956        assert_eq!(analysis.functions.len(), 1);
957        // 3 decision points: if, for, if
958        assert_eq!(analysis.functions[0].complexity, 4);
959    }
960
961    #[test]
962    fn test_class_method_complexity() {
963        let source = r#"
964class Calculator:
965    def add(self, a, b):
966        return a + b
967
968    def smart_divide(self, a, b):
969        if b == 0:
970            return None
971        return a / b
972"#;
973        let file = create_temp_file(source, ".py");
974        let result = analyze_file_complexity(file.path(), None);
975
976        assert!(result.is_ok());
977        let analysis = result.unwrap();
978
979        assert_eq!(analysis.functions.len(), 2);
980
981        // Find the methods by name
982        let add = analysis.functions.iter().find(|f| f.function_name == "Calculator.add");
983        let divide = analysis.functions.iter().find(|f| f.function_name == "Calculator.smart_divide");
984
985        assert!(add.is_some(), "Should find add method");
986        assert!(divide.is_some(), "Should find smart_divide method");
987
988        assert_eq!(add.unwrap().complexity, 1);
989        assert_eq!(divide.unwrap().complexity, 2);
990    }
991
992    #[test]
993    fn test_threshold_filtering() {
994        let source = r#"
995def simple():
996    return 1
997
998def complex_func(x, y, z):
999    if x > 0:
1000        if y > 0:
1001            if z > 0:
1002                return "all positive"
1003            else:
1004                return "z negative"
1005        else:
1006            return "y negative"
1007    else:
1008        return "x negative"
1009"#;
1010        let file = create_temp_file(source, ".py");
1011        let result = analyze_file_complexity(file.path(), Some(2));
1012
1013        assert!(result.is_ok());
1014        let analysis = result.unwrap();
1015
1016        assert!(analysis.violations.is_some());
1017        let violations = analysis.violations.unwrap();
1018
1019        // Only complex_func should be a violation (complexity > 2)
1020        assert_eq!(violations.len(), 1);
1021        assert_eq!(violations[0].function_name, "complex_func");
1022    }
1023
1024    #[test]
1025    fn test_statistics_calculation() {
1026        let complexities = vec![1, 2, 3, 10, 15, 25];
1027        let stats = ComplexityStats::from_complexities(&complexities);
1028
1029        assert_eq!(stats.total_functions, 6);
1030        assert_eq!(stats.min_complexity, 1);
1031        assert_eq!(stats.max_complexity, 25);
1032        assert!((stats.average_complexity - 9.33).abs() < 0.1);
1033
1034        // Check risk distribution
1035        assert_eq!(*stats.risk_distribution.get("low").unwrap_or(&0), 4);
1036        assert_eq!(*stats.risk_distribution.get("medium").unwrap_or(&0), 1);
1037        assert_eq!(*stats.risk_distribution.get("high").unwrap_or(&0), 1);
1038    }
1039
1040    #[test]
1041    fn test_empty_file_analysis() {
1042        let source = "# Just a comment\n";
1043        let file = create_temp_file(source, ".py");
1044        let result = analyze_file_complexity(file.path(), None);
1045
1046        assert!(result.is_ok());
1047        let analysis = result.unwrap();
1048
1049        assert_eq!(analysis.functions.len(), 0);
1050        assert_eq!(analysis.stats.total_functions, 0);
1051    }
1052
1053    #[test]
1054    fn test_boolean_operator_counting() {
1055        assert_eq!(count_boolean_operators("a && b"), 1);
1056        assert_eq!(count_boolean_operators("a || b"), 1);
1057        assert_eq!(count_boolean_operators("a && b && c"), 2);
1058        assert_eq!(count_boolean_operators("a || b && c"), 2);
1059
1060        // Python operators
1061        assert_eq!(count_boolean_operators("a and b"), 1);
1062        assert_eq!(count_boolean_operators("a or b"), 1);
1063        assert_eq!(count_boolean_operators("a and b and c"), 2);
1064
1065        // Should not match word fragments
1066        assert_eq!(count_boolean_operators("android"), 0);
1067        assert_eq!(count_boolean_operators("valor"), 0);
1068    }
1069
1070    #[test]
1071    fn test_word_boundary_matching() {
1072        assert_eq!(count_word_boundary("and", "a and b"), 1);
1073        assert_eq!(count_word_boundary("and", "android"), 0);
1074        assert_eq!(count_word_boundary("and", "band"), 0);
1075        assert_eq!(count_word_boundary("or", "a or b"), 1);
1076        assert_eq!(count_word_boundary("or", "for"), 0);
1077        assert_eq!(count_word_boundary("or", "order"), 0);
1078    }
1079
1080    #[test]
1081    fn test_try_except_complexity() {
1082        let source = r#"
1083def safe_divide(a, b):
1084    try:
1085        result = a / b
1086    except ZeroDivisionError:
1087        result = 0
1088    except TypeError:
1089        result = None
1090    return result
1091"#;
1092        let file = create_temp_file(source, ".py");
1093        let result = analyze_file_complexity(file.path(), None);
1094
1095        assert!(result.is_ok());
1096        let analysis = result.unwrap();
1097
1098        assert_eq!(analysis.functions.len(), 1);
1099        // 2 exception handlers = 2 decision points
1100        assert_eq!(analysis.functions[0].complexity, 3);
1101    }
1102
1103    #[test]
1104    fn test_typescript_complexity() {
1105        let source = r#"
1106function simple(): number {
1107    return 42;
1108}
1109
1110function withIf(x: number): string {
1111    if (x > 0) {
1112        return "positive";
1113    }
1114    return "non-positive";
1115}
1116"#;
1117        let file = create_temp_file(source, ".ts");
1118        let result = analyze_file_complexity(file.path(), None);
1119
1120        assert!(result.is_ok());
1121        let analysis = result.unwrap();
1122
1123        // Should find both functions
1124        assert_eq!(analysis.functions.len(), 2);
1125    }
1126
1127    #[test]
1128    fn test_nonexistent_file() {
1129        let result = analyze_file_complexity("/nonexistent/path/file.py", None);
1130        assert!(result.is_err());
1131    }
1132
1133    #[test]
1134    fn test_histogram_generation() {
1135        let complexities = vec![1, 2, 3, 5, 6, 7, 10, 11, 15, 20, 25, 30];
1136        let stats = ComplexityStats::from_complexities(&complexities);
1137
1138        // Should have histogram buckets
1139        assert!(!stats.histogram.is_empty());
1140
1141        // First bucket (1-5) should have 4 functions
1142        assert_eq!(stats.histogram[0].count, 4);
1143        assert_eq!(stats.histogram[0].label, "1-5");
1144
1145        // Second bucket (6-10) should have 3 functions
1146        assert_eq!(stats.histogram[1].count, 3);
1147    }
1148
1149    // =========================================================================
1150    // SIMD RISK LEVEL CLASSIFICATION TESTS
1151    // =========================================================================
1152
1153    #[test]
1154    fn test_simd_risk_level_empty() {
1155        let counts = RiskLevelCounts::count_simd(&[]);
1156        assert_eq!(counts.low, 0);
1157        assert_eq!(counts.medium, 0);
1158        assert_eq!(counts.high, 0);
1159        assert_eq!(counts.critical, 0);
1160    }
1161
1162    #[test]
1163    fn test_simd_risk_level_single_element() {
1164        // Test single element in each category
1165        let counts = RiskLevelCounts::count_simd(&[5]);
1166        assert_eq!(counts.low, 1);
1167
1168        let counts = RiskLevelCounts::count_simd(&[15]);
1169        assert_eq!(counts.medium, 1);
1170
1171        let counts = RiskLevelCounts::count_simd(&[30]);
1172        assert_eq!(counts.high, 1);
1173
1174        let counts = RiskLevelCounts::count_simd(&[100]);
1175        assert_eq!(counts.critical, 1);
1176    }
1177
1178    #[test]
1179    fn test_simd_risk_level_boundaries() {
1180        // Test exact boundary values
1181        // Low boundary: <= 10
1182        assert_eq!(RiskLevelCounts::count_simd(&[10]).low, 1);
1183        assert_eq!(RiskLevelCounts::count_simd(&[10]).medium, 0);
1184
1185        // Medium boundary: 11-20
1186        assert_eq!(RiskLevelCounts::count_simd(&[11]).medium, 1);
1187        assert_eq!(RiskLevelCounts::count_simd(&[20]).medium, 1);
1188
1189        // High boundary: 21-50
1190        assert_eq!(RiskLevelCounts::count_simd(&[21]).high, 1);
1191        assert_eq!(RiskLevelCounts::count_simd(&[50]).high, 1);
1192
1193        // Critical boundary: > 50
1194        assert_eq!(RiskLevelCounts::count_simd(&[51]).critical, 1);
1195    }
1196
1197    #[test]
1198    fn test_simd_risk_level_tail_only() {
1199        // Test with less than 8 elements (tail processing only)
1200        let complexities = vec![1, 5, 10, 15, 25, 55];
1201        let counts = RiskLevelCounts::count_simd(&complexities);
1202
1203        assert_eq!(counts.low, 3);      // 1, 5, 10
1204        assert_eq!(counts.medium, 1);   // 15
1205        assert_eq!(counts.high, 1);     // 25
1206        assert_eq!(counts.critical, 1); // 55
1207    }
1208
1209    #[test]
1210    fn test_simd_risk_level_exact_8() {
1211        // Test with exactly 8 elements (one full SIMD vector)
1212        let complexities = vec![1, 5, 10, 15, 20, 25, 50, 100];
1213        let counts = RiskLevelCounts::count_simd(&complexities);
1214
1215        assert_eq!(counts.low, 3);      // 1, 5, 10
1216        assert_eq!(counts.medium, 2);   // 15, 20
1217        assert_eq!(counts.high, 2);     // 25, 50
1218        assert_eq!(counts.critical, 1); // 100
1219    }
1220
1221    #[test]
1222    fn test_simd_risk_level_16_elements() {
1223        // Test with 16 elements (two full SIMD vectors)
1224        let complexities = vec![
1225            1, 2, 3, 4, 5, 6, 7, 8,     // All low (8)
1226            11, 12, 13, 14, 15, 16, 17, 18, // All medium (8)
1227        ];
1228        let counts = RiskLevelCounts::count_simd(&complexities);
1229
1230        assert_eq!(counts.low, 8);
1231        assert_eq!(counts.medium, 8);
1232        assert_eq!(counts.high, 0);
1233        assert_eq!(counts.critical, 0);
1234    }
1235
1236    #[test]
1237    fn test_simd_risk_level_17_elements() {
1238        // Test with 17 elements (two vectors + 1 tail)
1239        let complexities = vec![
1240            1, 2, 3, 4, 5, 6, 7, 10,    // All low (8)
1241            21, 22, 23, 24, 25, 26, 27, 30, // All high (8)
1242            51,                          // Critical (1 tail)
1243        ];
1244        let counts = RiskLevelCounts::count_simd(&complexities);
1245
1246        assert_eq!(counts.low, 8);
1247        assert_eq!(counts.medium, 0);
1248        assert_eq!(counts.high, 8);
1249        assert_eq!(counts.critical, 1);
1250    }
1251
1252    #[test]
1253    fn test_simd_risk_level_large_mixed() {
1254        // Large test with all categories
1255        let mut complexities = Vec::with_capacity(1000);
1256        for i in 0..250 {
1257            complexities.push(i % 10 + 1);   // Low: 1-10
1258        }
1259        for i in 0..250 {
1260            complexities.push(i % 10 + 11);  // Medium: 11-20
1261        }
1262        for i in 0..250 {
1263            complexities.push(i % 30 + 21);  // High: 21-50
1264        }
1265        for _ in 0..250 {
1266            complexities.push(100);          // Critical: 100
1267        }
1268
1269        let counts = RiskLevelCounts::count_simd(&complexities);
1270
1271        assert_eq!(counts.low, 250);
1272        assert_eq!(counts.medium, 250);
1273        assert_eq!(counts.high, 250);
1274        assert_eq!(counts.critical, 250);
1275    }
1276
1277    #[test]
1278    fn test_simd_matches_scalar() {
1279        // Verify SIMD results match scalar implementation
1280        let complexities: Vec<u32> = (1..=100).collect();
1281
1282        let simd_counts = RiskLevelCounts::count_simd(&complexities);
1283
1284        // Scalar reference implementation
1285        let mut low = 0;
1286        let mut medium = 0;
1287        let mut high = 0;
1288        let mut critical = 0;
1289        for &c in &complexities {
1290            match c {
1291                0..=10 => low += 1,
1292                11..=20 => medium += 1,
1293                21..=50 => high += 1,
1294                _ => critical += 1,
1295            }
1296        }
1297
1298        assert_eq!(simd_counts.low, low);
1299        assert_eq!(simd_counts.medium, medium);
1300        assert_eq!(simd_counts.high, high);
1301        assert_eq!(simd_counts.critical, critical);
1302    }
1303
1304    #[test]
1305    fn test_simd_to_hashmap() {
1306        let counts = RiskLevelCounts {
1307            low: 10,
1308            medium: 5,
1309            high: 3,
1310            critical: 1,
1311        };
1312
1313        let map = counts.to_hashmap();
1314
1315        assert_eq!(map.get("low"), Some(&10));
1316        assert_eq!(map.get("medium"), Some(&5));
1317        assert_eq!(map.get("high"), Some(&3));
1318        assert_eq!(map.get("critical"), Some(&1));
1319    }
1320
1321    #[test]
1322    fn test_simd_to_hashmap_omits_zeros() {
1323        let counts = RiskLevelCounts {
1324            low: 5,
1325            medium: 0,
1326            high: 0,
1327            critical: 0,
1328        };
1329
1330        let map = counts.to_hashmap();
1331
1332        assert_eq!(map.get("low"), Some(&5));
1333        assert_eq!(map.get("medium"), None);
1334        assert_eq!(map.get("high"), None);
1335        assert_eq!(map.get("critical"), None);
1336    }
1337}