repotoire 0.3.47

Graph-powered code analysis CLI. 81 detectors for security, architecture, and code quality.
//! Generator/Iterator Misuse detector.
//!
//! Detects common generator anti-patterns:
//! 1. Single-yield generators - generators with only one yield (unnecessary complexity)
//! 2. Immediate list conversion - generators immediately wrapped in list()
//! 3. Generators in boolean context - always truthy, likely a bug

use crate::detectors::base::{Detector, DetectorConfig, DetectorResult};
use crate::graph::GraphClient;
use crate::models::{Finding, Severity};

/// Generator misuse detector
///
/// Detects patterns that indicate misunderstanding of generators:
/// - Single-yield generators that could be regular functions
/// - list(generator) which defeats lazy evaluation
/// - Generators in boolean context (always truthy)
pub struct GeneratorMisuseDetector {
    config: DetectorConfig,
    /// Maximum findings to report
    max_findings: usize,
}

impl GeneratorMisuseDetector {
    /// Create a new generator misuse detector
    pub fn new() -> Self {
        Self {
            config: DetectorConfig::default(),
            max_findings: 100,
        }
    }

    /// Create with custom max findings
    pub fn with_max_findings(mut self, max: usize) -> Self {
        self.max_findings = max;
        self
    }

    /// Find single-yield generator functions
    fn find_single_yield_generators(&self, graph: &GraphClient) -> anyhow::Result<Vec<Finding>> {
        let mut findings = Vec::new();

        // Query for generator functions
        let query = r#"
            MATCH (f:Function)
            WHERE f.has_yield = true
            RETURN f.qualifiedName AS func_name,
                   f.name AS func_simple_name,
                   f.filePath AS func_file,
                   f.lineStart AS func_line,
                   f.lineEnd AS func_line_end,
                   COALESCE(f.yield_count, 0) AS yield_count
            ORDER BY f.qualifiedName
            LIMIT 200
        "#;

        let results = graph.execute(query)?;

        for row in results {
            let func_name = row.get_string("func_name").unwrap_or_default();
            let func_simple_name = row.get_string("func_simple_name").unwrap_or_default();
            let yield_count = row.get_i64("yield_count").unwrap_or(0);

            if func_name.is_empty() {
                continue;
            }

            // Only report single-yield generators
            // Note: yield_count may be 0 if not tracked by parser
            if yield_count == 1 {
                if findings.len() >= self.max_findings {
                    break;
                }

                let finding = self.create_single_yield_finding(
                    &func_name,
                    &func_simple_name,
                    &row.get_string("func_file").unwrap_or_default(),
                    row.get_i64("func_line"),
                );
                findings.push(finding);
            }
        }

        Ok(findings)
    }

    fn create_single_yield_finding(
        &self,
        func_name: &str,
        func_simple_name: &str,
        file_path: &str,
        line_number: Option<i64>,
    ) -> Finding {
        let description = format!(
            "Generator function `{}` has only **one yield statement**.\n\n\
             Single-yield generators add unnecessary complexity:\n\
             - Generator protocol overhead for single value\n\
             - Harder to understand than a simple return\n\
             - May indicate misunderstanding of generators\n\n\
             **Exception:** This is valid for context managers using `@contextmanager`.",
            func_simple_name
        );

        let suggestion = r#"**Option 1: Convert to regular function:**
```python
# Before (generator)
def get_config():
    yield load_config()

# After (regular function)
def get_config():
    return load_config()
```

**Option 2: If intentional (context manager):**
```python
@contextmanager
def managed_resource():
    resource = acquire()
    yield resource  # Single yield is correct here
    release(resource)
```"#;

        Finding {
            id: format!("generator_single_yield_{}", func_name),
            detector: "GeneratorMisuseDetector".to_string(),
            severity: Severity::Low,
            title: format!("Single-yield generator: {}", func_simple_name),
            description,
            affected_nodes: vec![func_name.to_string()],
            affected_files: if file_path.is_empty() {
                vec![]
            } else {
                vec![file_path.to_string()]
            },
            line_start: line_number,
            line_end: None,
            suggested_fix: Some(suggestion.to_string()),
            estimated_effort: Some("Small (15-30 minutes)".to_string()),
            confidence: 0.70,
            tags: vec![
                "generator".to_string(),
                "complexity".to_string(),
                "code_smell".to_string(),
            ],
            metadata: serde_json::json!({
                "pattern_type": "single_yield",
                "function_name": func_simple_name,
                "yield_count": 1,
            }),
        }
    }
}

impl Default for GeneratorMisuseDetector {
    fn default() -> Self {
        Self::new()
    }
}

impl Detector for GeneratorMisuseDetector {
    fn name(&self) -> &'static str {
        "GeneratorMisuseDetector"
    }

    fn description(&self) -> &'static str {
        "Detects generator misuse patterns: single-yield generators, immediate list conversion"
    }

    fn detect(&self, graph: &GraphClient) -> DetectorResult {
        let mut findings = Vec::new();

        // Find single-yield generators from graph
        match self.find_single_yield_generators(graph) {
            Ok(single_yield) => findings.extend(single_yield),
            Err(e) => tracing::warn!("Failed to find single-yield generators: {}", e),
        }

        // Note: Immediate list conversion and boolean context detection
        // would require source file analysis (AST parsing), which is
        // more efficiently done in Python or via a separate Rust parser.
        // The graph-based detection focuses on what's stored in the graph.

        Ok(findings)
    }

    fn is_dependent(&self) -> bool {
        false
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_detector_creation() {
        let detector = GeneratorMisuseDetector::new();
        assert_eq!(detector.name(), "GeneratorMisuseDetector");
        assert!(!detector.is_dependent());
    }
}