repotoire 0.8.2

Graph-powered code analysis CLI. 110 detectors for security, architecture, bus factor, and code quality.
Documentation
//! Boolean Trap Detector
//!
//! Graph-enhanced detection of multiple boolean arguments in function calls.
//! Uses graph to:
//! - Find the target function definition to get param names
//! - Count how many call sites have this pattern
//! - Identify if it's a widely-used function (higher impact)

use crate::detectors::base::{Detector, DetectorConfig};
use crate::graph::GraphQueryExt;
use crate::models::{deterministic_finding_id, Finding, Severity};
use anyhow::Result;
use regex::Regex;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::LazyLock;
use tracing::info;

static BOOL_ARGS: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"\w+\s*\([^)]*\b(true|false|True|False)\s*,\s*(true|false|True|False)")
        .expect("valid regex")
});
static FUNC_CALL: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r"(\w+)\s*\(").expect("valid regex"));

pub struct BooleanTrapDetector {
    #[allow(dead_code)] // Part of detector pattern, used for file scanning
    repository_path: PathBuf,
    max_findings: usize,
}

impl BooleanTrapDetector {
    crate::detectors::detector_new!(50);

    /// Extract function name from call
    fn extract_func_name(line: &str) -> Option<String> {
        FUNC_CALL
            .captures(line)
            .and_then(|c| c.get(1).map(|m| m.as_str().to_string()))
    }

    /// Count boolean args in a call
    fn count_bool_args(line: &str) -> usize {
        let bools = ["true", "false", "True", "False"];
        bools.iter().map(|b| line.matches(b).count()).sum()
    }
}

impl Detector for BooleanTrapDetector {
    fn name(&self) -> &'static str {
        "boolean-trap"
    }
    fn description(&self) -> &'static str {
        "Detects multiple boolean arguments"
    }

    fn file_extensions(&self) -> &'static [&'static str] {
        &["py", "js", "ts", "jsx", "tsx", "java", "go", "rs"]
    }

    fn detect(
        &self,
        ctx: &crate::detectors::analysis_context::AnalysisContext,
    ) -> Result<Vec<Finding>> {
        let graph = ctx.graph;
        let files = &ctx.as_file_provider();
        let i = graph.interner();
        let mut findings = vec![];
        let mut func_call_counts: HashMap<String, usize> = HashMap::new();

        // First pass: collect all boolean trap calls and count per function
        let mut trap_calls: Vec<(PathBuf, u32, String, usize)> = Vec::new();

        for path in
            files.files_with_extensions(&["py", "js", "ts", "jsx", "tsx", "java", "go", "rs"])
        {
            // Cheap pre-filter: skip files without boolean literals
            let raw = match files.content(path) {
                Some(c) => c,
                None => continue,
            };
            if !raw.contains("true")
                && !raw.contains("True")
                && !raw.contains("false")
                && !raw.contains("False")
            {
                continue;
            }

            // Tighter pre-filter: check raw content for adjacent boolean args pattern
            // before paying for masked_content line scanning (~90% of files eliminated)
            if !BOOL_ARGS.is_match(&raw) {
                continue;
            }

            if let Some(content) = files.masked_content(path) {
                let lines: Vec<&str> = content.lines().collect();
                for (i, line) in lines.iter().enumerate() {
                    let prev_line = if i > 0 { Some(lines[i - 1]) } else { None };
                    if crate::detectors::is_line_suppressed(line, prev_line) {
                        continue;
                    }

                    if BOOL_ARGS.is_match(line) {
                        if let Some(func_name) = Self::extract_func_name(line) {
                            let bool_count = Self::count_bool_args(line);
                            *func_call_counts.entry(func_name.clone()).or_default() += 1;
                            trap_calls.push((
                                path.to_path_buf(),
                                (i + 1) as u32,
                                func_name,
                                bool_count,
                            ));
                        }
                    }
                }
            }
        }

        // Lazily build name→CodeNode ref map (only if there are trap calls)
        // Uses get_functions_shared() to borrow from cached Arc — avoids cloning 71K CodeNodes.
        let all_funcs = if trap_calls.is_empty() {
            None
        } else {
            Some(graph.get_functions_shared())
        };
        let func_by_name: Option<
            std::collections::HashMap<&str, &crate::graph::store_models::CodeNode>,
        > = all_funcs
            .as_ref()
            .map(|funcs| funcs.iter().map(|f| (f.node_name(i), f)).collect());

        // Second pass: create findings with graph context
        for (path, line_num, func_name, bool_count) in trap_calls {
            if findings.len() >= self.max_findings {
                break;
            }

            let call_count = func_call_counts.get(&func_name).copied().unwrap_or(1);

            // Find the function definition in graph — O(1) lookup
            let func_def = func_by_name
                .as_ref()
                .and_then(|m| m.get(func_name.as_str()).copied());

            // Build context
            let mut notes = Vec::new();

            if call_count > 1 {
                notes.push(format!("📊 {} call sites with this pattern", call_count));
            }

            if bool_count > 2 {
                notes.push(format!(
                    "⚠️ {} boolean arguments (very confusing)",
                    bool_count
                ));
            }

            if let Some(def) = func_def {
                if let Some(params_str) = graph
                    .extra_props(def.qualified_name)
                    .and_then(|ep| ep.params)
                    .map(|key| i.resolve(key).to_string())
                {
                    notes.push(format!("📝 Function params: {}", params_str));
                }
                // Use O(1) fan-in count to avoid cloning caller CodeNodes
                let fan_in = graph.call_fan_in(def.qn(i));
                if fan_in > 5 {
                    notes.push(format!(
                        "🔥 Widely used ({} callers) - high impact fix",
                        fan_in
                    ));
                }
            }

            let context_notes = if notes.is_empty() {
                String::new()
            } else {
                format!("\n\n**Analysis:**\n{}", notes.join("\n"))
            };

            // Calculate severity based on usage
            let severity = if bool_count > 2 || call_count > 5 {
                Severity::Medium
            } else {
                Severity::Low
            };

            // Build suggestion based on language
            let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
            let suggestion = match ext {
                "py" => format!(
                    "Use keyword arguments:\n\
                     ```python\n\
                     {}(verbose=True, debug=False)\n\
                     ```",
                    func_name
                ),
                "js" | "ts" => format!(
                    "Use an options object:\n\
                     ```javascript\n\
                     {}({{ verbose: true, debug: false }})\n\
                     ```",
                    func_name
                ),
                _ => "Use named arguments or an options object.".to_string(),
            };

            let file_str = path.to_string_lossy();
            let title = format!("Boolean trap: {}({} bools)", func_name, bool_count);

            findings.push(Finding {
                id: deterministic_finding_id("BooleanTrapDetector", &file_str, line_num, &title),
                detector: "BooleanTrapDetector".to_string(),
                severity,
                title,
                description: format!(
                    "`{}(true, false, ...)` is hard to understand at the call site.{}",
                    func_name, context_notes
                ),
                affected_files: vec![path],
                line_start: Some(line_num),
                line_end: Some(line_num),
                suggested_fix: Some(suggestion),
                estimated_effort: Some(if call_count > 5 {
                    "30 minutes".to_string()
                } else {
                    "15 minutes".to_string()
                }),
                category: Some("readability".to_string()),
                cwe_id: None,
                why_it_matters: Some(
                    "Boolean traps make APIs confusing and error-prone. \
                     It's easy to swap arguments or forget their meaning."
                        .to_string(),
                ),
                ..Default::default()
            });
        }

        info!(
            "BooleanTrapDetector found {} findings (graph-aware)",
            findings.len()
        );
        Ok(findings)
    }
}

impl crate::detectors::RegisteredDetector for BooleanTrapDetector {
    fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
        std::sync::Arc::new(Self::new(init.repo_path))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::graph::builder::GraphBuilder;

    #[test]
    fn test_detects_boolean_trap() {
        let store = GraphBuilder::new().freeze();
        let detector = BooleanTrapDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("caller.py", "def main():\n    process(data, True, False)\n")],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            !findings.is_empty(),
            "Should detect boolean trap with True, False arguments"
        );
        assert!(
            findings[0].title.contains("Boolean trap"),
            "Title should mention boolean trap, got: {}",
            findings[0].title
        );
    }

    #[test]
    fn test_no_finding_without_multiple_booleans() {
        // Only one boolean argument - no trap
        let store = GraphBuilder::new().freeze();
        let detector = BooleanTrapDetector::new("/mock/repo");
        let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
            &store,
            vec![("caller.py", "def main():\n    process(data, True)\n")],
        );
        let findings = detector.detect(&ctx).expect("detection should succeed");
        assert!(
            findings.is_empty(),
            "Should not flag single boolean argument, but got: {:?}",
            findings.iter().map(|f| &f.title).collect::<Vec<_>>()
        );
    }
}