homeboy 0.74.0 - Docs.rs

//! Duplication detection — find identical and near-identical functions across
//! source files, and duplicated code blocks within a single method.
//!
//! Uses method body hashes from fingerprinting to detect exact duplicates,
//! and structural hashes (identifiers/literals normalized to positional tokens)
//! to detect near-duplicates — functions with identical control flow that differ
//! only in variable names, constant references, or string values.
//!
//! Four outputs:
//! - `detect_duplicates()` → flat `Vec<Finding>` for exact duplicates
//! - `detect_duplicate_groups()` → structured `Vec<DuplicateGroup>` for the fixer
//! - `detect_near_duplicates()` → flat `Vec<Finding>` for structural near-duplicates
//! - `detect_intra_method_duplicates()` → duplicated blocks within a single method

use std::collections::HashMap;

use super::conventions::AuditFinding;
use super::findings::{Finding, Severity};
use super::fingerprint::FileFingerprint;

/// Minimum number of locations for a function to count as duplicated.
const MIN_DUPLICATE_LOCATIONS: usize = 2;

/// A group of files containing an identical function.
///
/// The fixer uses this to keep the canonical copy and remove the rest.
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct DuplicateGroup {
    /// The duplicated function name.
    pub function_name: String,
    /// File chosen to keep the function (canonical location).
    pub canonical_file: String,
    /// Files where the duplicate should be removed and replaced with an import.
    pub remove_from: Vec<String>,
}

/// Build grouped duplication data from fingerprints.
///
/// For each group of identical functions, picks a canonical file (shortest
/// path, then alphabetical) and lists the rest as removal targets.
fn build_groups(fingerprints: &[&FileFingerprint]) -> HashMap<(String, String), Vec<String>> {
    let mut hash_groups: HashMap<(String, String), Vec<String>> = HashMap::new();

    for fp in fingerprints {
        for (method_name, body_hash) in &fp.method_hashes {
            hash_groups
                .entry((method_name.clone(), body_hash.clone()))
                .or_default()
                .push(fp.relative_path.clone());
        }
    }

    hash_groups
}

/// Pick the canonical file from a list of locations.
///
/// Heuristics (in order):
/// 1. Files in a `utils/` directory are preferred (already shared)
/// 2. Shortest path (most general module)
/// 3. Alphabetical (deterministic tiebreaker)
fn pick_canonical(locations: &[String]) -> String {
    let mut sorted = locations.to_vec();
    sorted.sort_by(|a, b| {
        let a_utils = a.contains("/utils/") || a.contains("/utils.");
        let b_utils = b.contains("/utils/") || b.contains("/utils.");
        // utils files first
        b_utils
            .cmp(&a_utils)
            // then shortest path
            .then_with(|| a.len().cmp(&b.len()))
            // then alphabetical
            .then_with(|| a.cmp(b))
    });
    sorted[0].clone()
}

/// Detect duplicate groups with canonical file selection.
///
/// Returns structured data the fixer uses to remove duplicates.
pub(crate) fn detect_duplicate_groups(fingerprints: &[&FileFingerprint]) -> Vec<DuplicateGroup> {
    let hash_groups = build_groups(fingerprints);
    let mut groups = Vec::new();

    for ((method_name, _hash), locations) in &hash_groups {
        if locations.len() < MIN_DUPLICATE_LOCATIONS {
            continue;
        }

        let canonical = pick_canonical(locations);
        let mut remove_from: Vec<String> = locations
            .iter()
            .filter(|f| **f != canonical)
            .cloned()
            .collect();
        remove_from.sort();

        groups.push(DuplicateGroup {
            function_name: method_name.clone(),
            canonical_file: canonical,
            remove_from,
        });
    }

    groups.sort_by(|a, b| a.function_name.cmp(&b.function_name));
    groups
}

/// Detect duplicated functions across all fingerprinted files.
///
/// Groups functions by their body hash. When two or more files contain a
/// function with the same name and the same normalized body hash, a finding
/// is emitted for each location.
pub(crate) fn detect_duplicates(fingerprints: &[&FileFingerprint]) -> Vec<Finding> {
    let hash_groups = build_groups(fingerprints);
    let mut findings = Vec::new();

    for ((method_name, _hash), locations) in &hash_groups {
        if locations.len() < MIN_DUPLICATE_LOCATIONS {
            continue;
        }

        let suggestion = format!(
            "Function `{}` has identical body in {} files. \
             Extract to a shared module and import it.",
            method_name,
            locations.len()
        );

        // Emit one finding per file that has the duplicate
        for file in locations {
            let mut also_in_vec: Vec<_> =
                locations.iter().filter(|f| *f != file).cloned().collect();
            also_in_vec.sort();
            let also_in = also_in_vec.join(", ");

            findings.push(Finding {
                convention: "duplication".to_string(),
                severity: Severity::Warning,
                file: file.clone(),
                description: format!("Duplicate function `{}` — also in {}", method_name, also_in),
                suggestion: suggestion.clone(),
                kind: AuditFinding::DuplicateFunction,
            });
        }
    }

    // Sort by file path then description for deterministic output
    findings.sort_by(|a, b| {
        a.file
            .cmp(&b.file)
            .then_with(|| a.description.cmp(&b.description))
    });
    findings
}

// ============================================================================
// Near-Duplicate Detection (structural similarity)
// ============================================================================

/// Names that are too generic to flag as near-duplicates.
/// These appear in many files with completely unrelated implementations.
const GENERIC_NAMES: &[&str] = &[
    "run", "new", "default", "build", "list", "show", "set", "get", "delete", "remove", "clear",
    "create", "update", "status", "search", "find", "read", "write", "rename", "init", "test",
    "fmt", "from", "into", "clone", "drop", "display", "parse", "validate", "execute", "handle",
    "process", "merge", "resolve", "pin", "plan",
];

/// Minimum body line count — skip trivial functions (1-2 line bodies).
/// Functions like `fn default_true() -> bool { true }` are too small
/// to meaningfully refactor into shared code with a parameter.
const MIN_BODY_LINES: usize = 3;

/// Build structural hash groups from fingerprints.
///
/// Groups functions by (name, structural_hash), returning only groups
/// where the exact body hashes differ (otherwise they'd already be caught
/// by the exact-duplicate detector).
fn build_structural_groups(
    fingerprints: &[&FileFingerprint],
) -> HashMap<(String, String), Vec<(String, String)>> {
    // Collect: (fn_name, structural_hash) → [(file, body_hash), ...]
    let mut groups: HashMap<(String, String), Vec<(String, String)>> = HashMap::new();

    for fp in fingerprints {
        for (method_name, struct_hash) in &fp.structural_hashes {
            groups
                .entry((method_name.clone(), struct_hash.clone()))
                .or_default()
                .push((
                    fp.relative_path.clone(),
                    fp.method_hashes
                        .get(method_name)
                        .cloned()
                        .unwrap_or_default(),
                ));
        }
    }

    groups
}

/// Check if a file path looks like a CLI command module.
///
/// Command modules (`src/commands/*.rs`) are expected to have identically-
/// named functions (`run`, `list`, etc.) with completely different bodies.
fn is_command_file(path: &str) -> bool {
    path.contains("/commands/") || path.starts_with("commands/")
}

/// Count the body lines of a function in a file's structural hash data.
///
/// Uses heuristic: count lines in the content between `fn <name>` and the
/// matching closing brace. Returns 0 if function not found or content empty.
fn count_body_lines(fp: &FileFingerprint, method_name: &str) -> usize {
    let pattern = format!("fn {}", method_name);
    let lines: Vec<&str> = fp.content.lines().collect();
    let mut start = None;

    for (i, line) in lines.iter().enumerate() {
        if line.contains(&pattern) {
            start = Some(i);
            break;
        }
    }

    let Some(start_idx) = start else { return 0 };

    let mut brace_depth = 0i32;
    let mut found_open = false;
    for (offset, line) in lines[start_idx..].iter().enumerate() {
        for ch in line.chars() {
            if ch == '{' {
                brace_depth += 1;
                found_open = true;
            } else if ch == '}' {
                brace_depth -= 1;
            }
        }
        if found_open && brace_depth == 0 {
            return offset + 1;
        }
    }

    0
}

/// Detect structural near-duplicates across all fingerprinted files.
///
/// Groups functions by (name, structural_hash). When two or more files
/// contain a function with the same name and the same structural hash
/// but *different* exact body hashes, it means the functions have
/// identical control flow but differ in identifiers/constants.
///
/// Filters out:
/// - Functions already caught by exact-duplicate detection
/// - Generic names (`run`, `list`, `show`, etc.)
/// - Command/core delegation pairs (command module ↔ core module)
/// - Trivial functions (< 3 body lines)
pub(crate) fn detect_near_duplicates(fingerprints: &[&FileFingerprint]) -> Vec<Finding> {
    let structural_groups = build_structural_groups(fingerprints);
    let exact_groups = build_groups(fingerprints);

    // Collect exact-duplicate (name, hash) pairs for exclusion
    let exact_duplicate_names: std::collections::HashSet<String> = exact_groups
        .iter()
        .filter(|(_, locs)| locs.len() >= MIN_DUPLICATE_LOCATIONS)
        .map(|((name, _), _)| name.clone())
        .collect();

    let mut findings = Vec::new();

    for ((method_name, _struct_hash), file_hashes) in &structural_groups {
        // Need at least 2 locations
        if file_hashes.len() < MIN_DUPLICATE_LOCATIONS {
            continue;
        }

        // Skip if already an exact duplicate
        if exact_duplicate_names.contains(method_name) {
            continue;
        }

        // Skip generic names
        if GENERIC_NAMES.contains(&method_name.as_str()) {
            continue;
        }

        // Check that exact hashes actually differ (otherwise exact detection covers it)
        let unique_body_hashes: std::collections::HashSet<&str> =
            file_hashes.iter().map(|(_, h)| h.as_str()).collect();
        if unique_body_hashes.len() < 2 {
            continue;
        }

        let files: Vec<&str> = file_hashes.iter().map(|(f, _)| f.as_str()).collect();

        // Filter: skip if all files are command modules (delegation pattern)
        if files.iter().all(|f| is_command_file(f)) {
            continue;
        }

        // Filter: skip command↔core pairs where one is in commands/ and another in core/
        // These are the delegation pattern — the command calls the core function.
        let has_command = files.iter().any(|f| is_command_file(f));
        let has_non_command = files.iter().any(|f| !is_command_file(f));
        if has_command && has_non_command && files.len() == 2 {
            continue;
        }

        // Filter: skip trivial functions (< MIN_BODY_LINES)
        let body_lines: Vec<usize> = files
            .iter()
            .filter_map(|file_path| {
                fingerprints
                    .iter()
                    .find(|fp| fp.relative_path == *file_path)
                    .map(|fp| count_body_lines(fp, method_name))
            })
            .collect();
        if body_lines.iter().all(|&l| l < MIN_BODY_LINES) {
            continue;
        }

        let suggestion = format!(
            "Function `{}` has identical structure in {} files but different \
             identifiers/constants. Consider extracting shared logic into a \
             parameterized function.",
            method_name,
            files.len()
        );

        for (file, _body_hash) in file_hashes {
            let mut also_in_vec: Vec<&str> = file_hashes
                .iter()
                .filter(|(f, _)| f != file)
                .map(|(f, _)| f.as_str())
                .collect();
            also_in_vec.sort();
            let also_in = also_in_vec.join(", ");

            findings.push(Finding {
                convention: "near-duplication".to_string(),
                severity: Severity::Info,
                file: file.clone(),
                description: format!(
                    "Near-duplicate `{}` — structurally identical to {}",
                    method_name, also_in
                ),
                suggestion: suggestion.clone(),
                kind: AuditFinding::NearDuplicate,
            });
        }
    }

    findings.sort_by(|a, b| {
        a.file
            .cmp(&b.file)
            .then_with(|| a.description.cmp(&b.description))
    });
    findings
}

// ============================================================================
// Intra-Method Duplication Detection
// ============================================================================

/// Minimum number of non-blank, non-comment lines for a block to be
/// considered meaningful. Blocks shorter than this are too trivial to flag.
const MIN_INTRA_BLOCK_LINES: usize = 5;

/// Detect duplicated code blocks within the same method/function.
///
/// For each method in each file, extracts the method body from the file
/// content and uses a sliding window of `MIN_INTRA_BLOCK_LINES` normalized
/// lines. When the same window hash appears at two non-overlapping positions
/// within one method, it means a block of code was copy-pasted (merge
/// artifacts, copy-paste errors, etc.).
pub(crate) fn detect_intra_method_duplicates(fingerprints: &[&FileFingerprint]) -> Vec<Finding> {
    let mut findings = Vec::new();

    for fp in fingerprints {
        if fp.content.is_empty() {
            continue;
        }

        let file_lines: Vec<&str> = fp.content.lines().collect();

        for method_name in &fp.methods {
            let Some((body_start, body_end)) = find_method_body(&file_lines, method_name) else {
                continue;
            };

            // Extract body lines (excluding the opening/closing brace lines)
            if body_start + 1 >= body_end {
                continue;
            }
            let body_lines: Vec<&str> = file_lines[body_start + 1..body_end].to_vec();

            if body_lines.len() < MIN_INTRA_BLOCK_LINES * 2 {
                // Body too short to contain two meaningful duplicate blocks
                continue;
            }

            // Build list of (original_body_index, normalized_text) for non-blank
            // non-comment lines
            let normalized: Vec<(usize, String)> = body_lines
                .iter()
                .enumerate()
                .filter_map(|(i, line)| {
                    let trimmed = line.trim();
                    if trimmed.is_empty() || is_comment_only(trimmed) {
                        None
                    } else {
                        Some((i, normalize_line(trimmed)))
                    }
                })
                .collect();

            if normalized.len() < MIN_INTRA_BLOCK_LINES * 2 {
                continue;
            }

            // Hash each sliding window of MIN_INTRA_BLOCK_LINES consecutive
            // normalized lines. Store (hash, start_body_idx, end_body_idx).
            let mut window_hashes: Vec<(u64, usize, usize)> = Vec::new();

            for win_start in 0..=normalized.len() - MIN_INTRA_BLOCK_LINES {
                let win_end = win_start + MIN_INTRA_BLOCK_LINES;
                let mut hasher = std::hash::DefaultHasher::new();
                for (_, norm_line) in &normalized[win_start..win_end] {
                    std::hash::Hash::hash(norm_line, &mut hasher);
                }
                let hash = std::hash::Hasher::finish(&hasher);

                let orig_start = normalized[win_start].0;
                let orig_end = normalized[win_end - 1].0;

                window_hashes.push((hash, orig_start, orig_end));
            }

            // Group by hash, look for non-overlapping pairs
            let mut hash_positions: HashMap<u64, Vec<(usize, usize)>> = HashMap::new();
            for (hash, start, end) in &window_hashes {
                hash_positions
                    .entry(*hash)
                    .or_default()
                    .push((*start, *end));
            }

            let mut reported = false;

            for positions in hash_positions.values() {
                if reported || positions.len() < 2 {
                    continue;
                }

                let first = positions[0];
                for other in &positions[1..] {
                    // Non-overlapping: second block starts after first block ends
                    if other.0 <= first.1 {
                        continue;
                    }

                    // Extend the match: keep sliding forward while lines match
                    let first_norm_idx = normalized
                        .iter()
                        .position(|(i, _)| *i == first.0)
                        .unwrap_or(0);
                    let other_norm_idx = normalized
                        .iter()
                        .position(|(i, _)| *i == other.0)
                        .unwrap_or(0);

                    let mut match_len = MIN_INTRA_BLOCK_LINES;
                    while first_norm_idx + match_len < normalized.len()
                        && other_norm_idx + match_len < normalized.len()
                        && first_norm_idx + match_len < other_norm_idx
                    {
                        if normalized[first_norm_idx + match_len].1
                            == normalized[other_norm_idx + match_len].1
                        {
                            match_len += 1;
                        } else {
                            break;
                        }
                    }

                    // Convert body-relative line numbers to 1-indexed file lines
                    let first_file_line = body_start + 1 + first.0 + 1;
                    let other_file_line = body_start + 1 + other.0 + 1;

                    findings.push(Finding {
                        convention: "intra-method-duplication".to_string(),
                        severity: Severity::Warning,
                        file: fp.relative_path.clone(),
                        description: format!(
                            "Duplicated block in `{}` — {} identical lines at line {} and line {}",
                            method_name, match_len, first_file_line, other_file_line
                        ),
                        suggestion: format!(
                            "Function `{}` contains a duplicated code block ({} lines). \
                             This is often a merge artifact or copy-paste error. \
                             Remove the duplicate or extract shared logic.",
                            method_name, match_len
                        ),
                        kind: AuditFinding::IntraMethodDuplicate,
                    });
                    reported = true;
                    break;
                }

                if reported {
                    break;
                }
            }
        }
    }

    findings.sort_by(|a, b| {
        a.file
            .cmp(&b.file)
            .then_with(|| a.description.cmp(&b.description))
    });
    findings
}

/// Find the body of a method/function in the file lines.
///
/// Returns `(open_brace_line, close_brace_line)` — the line indices of the
/// opening and closing braces. Searches for `function <name>` or `fn <name>`.
fn find_method_body(lines: &[&str], method_name: &str) -> Option<(usize, usize)> {
    let fn_pattern_php = format!("function {}", method_name);
    let fn_pattern_rust = format!("fn {}", method_name);

    let mut start_line = None;
    for (i, line) in lines.iter().enumerate() {
        if line.contains(&fn_pattern_php) || line.contains(&fn_pattern_rust) {
            start_line = Some(i);
            break;
        }
    }

    let start = start_line?;

    // Find opening brace from the function declaration line
    let mut brace_line = None;
    for (offset, line) in lines[start..].iter().enumerate() {
        if line.contains('{') {
            brace_line = Some(start + offset);
            break;
        }
    }

    let open_line = brace_line?;

    // Track brace depth to find closing brace
    let mut depth = 0i32;
    let mut found_open = false;
    for (i, line) in lines[open_line..].iter().enumerate() {
        for ch in line.chars() {
            if ch == '{' {
                depth += 1;
                found_open = true;
            } else if ch == '}' {
                depth -= 1;
            }
        }
        if found_open && depth == 0 {
            return Some((open_line, open_line + i));
        }
    }

    None
}

/// Check if a line is comment-only (PHP, Rust, or shell style).
fn is_comment_only(trimmed: &str) -> bool {
    trimmed.starts_with("//")
        || trimmed.starts_with("/*")
        || trimmed.starts_with('*')
        || trimmed.starts_with('#')
}

/// Normalize a line for hashing: collapse whitespace, lowercase.
fn normalize_line(line: &str) -> String {
    line.split_whitespace()
        .collect::<Vec<_>>()
        .join(" ")
        .to_lowercase()
}

// ============================================================================
// Parallel Implementation Detection (call-sequence similarity)
// ============================================================================

/// Minimum number of function calls in a method body to consider it for
/// parallel implementation detection. Trivial methods (< 4 calls) are
/// too simple to meaningfully abstract.
const MIN_CALL_COUNT: usize = 4;

/// Minimum Jaccard similarity (|intersection| / |union|) between two
/// call sets to flag as a parallel implementation.
const MIN_JACCARD_SIMILARITY: f64 = 0.5;

/// Minimum longest-common-subsequence ratio to flag as parallel.
/// This captures sequential ordering — two methods that call helpers
/// in the same order score higher than ones that share calls but in
/// a different order.
const MIN_LCS_RATIO: f64 = 0.5;

/// Per-method call sequence extracted from file content.
#[derive(Debug)]
struct MethodCallSequence {
    file: String,
    method: String,
    /// Ordered list of function/method calls made in the body.
    calls: Vec<String>,
}

/// Extract function call names from a code block.
///
/// Matches patterns like `function_name(`, `self.method(`, `Type::method(`.
/// Returns the called name (without receiver/namespace prefix).
fn extract_calls_from_body(body: &str) -> Vec<String> {
    let mut calls = Vec::new();

    for line in body.lines() {
        let trimmed = line.trim();
        // Skip comments
        if trimmed.starts_with("//") || trimmed.starts_with("/*") || trimmed.starts_with('*') {
            continue;
        }

        // Find all `identifier(` patterns
        let chars: Vec<char> = trimmed.chars().collect();
        let mut i = 0;
        while i < chars.len() {
            // Look for `(`
            if chars[i] == '(' && i > 0 {
                // Walk backwards to find the identifier
                let end = i;
                let mut start = i;
                while start > 0 && (chars[start - 1].is_alphanumeric() || chars[start - 1] == '_') {
                    start -= 1;
                }
                if start < end {
                    let name: String = chars[start..end].iter().collect();
                    // Skip language keywords and control flow
                    if !is_keyword(&name) && !name.is_empty() {
                        calls.push(name);
                    }
                }
            }
            i += 1;
        }
    }

    calls
}

/// Check if a name is a language keyword (not a function call).
fn is_keyword(name: &str) -> bool {
    matches!(
        name,
        "if" | "else"
            | "for"
            | "while"
            | "loop"
            | "match"
            | "return"
            | "let"
            | "mut"
            | "const"
            | "fn"
            | "pub"
            | "use"
            | "mod"
            | "struct"
            | "enum"
            | "impl"
            | "trait"
            | "type"
            | "where"
            | "self"
            | "Self"
            | "super"
            | "crate"
            | "as"
            | "in"
            | "ref"
            | "Some"
            | "None"
            | "Ok"
            | "Err"
            | "true"
            | "false"
            | "assert"
            | "assert_eq"
            | "assert_ne"
            | "println"
            | "eprintln"
            | "format"
            | "vec"
            | "todo"
            | "unimplemented"
            | "unreachable"
            | "panic"
            | "dbg"
    )
}

/// Extract per-method call sequences from all fingerprints.
fn extract_call_sequences(fingerprints: &[&FileFingerprint]) -> Vec<MethodCallSequence> {
    let mut sequences = Vec::new();

    for fp in fingerprints {
        if fp.content.is_empty() {
            continue;
        }

        let lines: Vec<&str> = fp.content.lines().collect();

        for method_name in &fp.methods {
            // Skip generic names — they're expected to have similar call patterns
            if GENERIC_NAMES.contains(&method_name.as_str()) {
                continue;
            }

            let Some((body_start, body_end)) = find_method_body(&lines, method_name) else {
                continue;
            };

            if body_start + 1 >= body_end {
                continue;
            }

            let body: String = lines[body_start + 1..body_end].join("\n");
            let calls = extract_calls_from_body(&body);

            if calls.len() >= MIN_CALL_COUNT {
                sequences.push(MethodCallSequence {
                    file: fp.relative_path.clone(),
                    method: method_name.clone(),
                    calls,
                });
            }
        }
    }

    sequences
}

/// Compute Jaccard similarity between two sets.
fn jaccard_similarity(a: &[String], b: &[String]) -> f64 {
    let set_a: std::collections::HashSet<&str> = a.iter().map(|s| s.as_str()).collect();
    let set_b: std::collections::HashSet<&str> = b.iter().map(|s| s.as_str()).collect();

    let intersection = set_a.intersection(&set_b).count();
    let union = set_a.union(&set_b).count();

    if union == 0 {
        0.0
    } else {
        intersection as f64 / union as f64
    }
}

/// Compute longest common subsequence length between two sequences.
fn lcs_length(a: &[String], b: &[String]) -> usize {
    let m = a.len();
    let n = b.len();
    let mut dp = vec![vec![0usize; n + 1]; m + 1];

    for i in 1..=m {
        for j in 1..=n {
            if a[i - 1] == b[j - 1] {
                dp[i][j] = dp[i - 1][j - 1] + 1;
            } else {
                dp[i][j] = dp[i - 1][j].max(dp[i][j - 1]);
            }
        }
    }

    dp[m][n]
}

/// Compute LCS ratio: 2 * LCS / (len(a) + len(b)).
fn lcs_ratio(a: &[String], b: &[String]) -> f64 {
    let total = a.len() + b.len();
    if total == 0 {
        return 0.0;
    }
    2.0 * lcs_length(a, b) as f64 / total as f64
}

/// Detect parallel implementations across files.
///
/// Compares all method pairs (in different files) by their call sequences.
/// When two methods make a similar set of calls in a similar order — but
/// have different names and different exact implementations — they're
/// likely parallel implementations of the same workflow that should be
/// abstracted into a shared parameterized function.
///
/// Filters out:
/// - Methods in the same file
/// - Generic names (run, new, build, etc.)
/// - Methods with fewer than MIN_CALL_COUNT calls
/// - Pairs already caught by exact or near-duplicate detection
/// - Pairs below both similarity thresholds
pub(crate) fn detect_parallel_implementations(fingerprints: &[&FileFingerprint]) -> Vec<Finding> {
    let sequences = extract_call_sequences(fingerprints);

    // Build sets of already-flagged pairs (exact + near duplicates) to avoid double-flagging
    let exact_groups = build_groups(fingerprints);
    let exact_dup_fns: std::collections::HashSet<String> = exact_groups
        .iter()
        .filter(|(_, locs)| locs.len() >= MIN_DUPLICATE_LOCATIONS)
        .map(|((name, _), _)| name.clone())
        .collect();

    let mut findings = Vec::new();
    let mut reported_pairs: std::collections::HashSet<(String, String)> =
        std::collections::HashSet::new();

    for i in 0..sequences.len() {
        for j in (i + 1)..sequences.len() {
            let a = &sequences[i];
            let b = &sequences[j];

            // Skip same file
            if a.file == b.file {
                continue;
            }

            // Skip if same function name (already caught by other detectors)
            if a.method == b.method {
                continue;
            }

            // Skip if either function is an exact duplicate
            if exact_dup_fns.contains(&a.method) || exact_dup_fns.contains(&b.method) {
                continue;
            }

            // Skip already-reported pairs (both directions)
            let pair_key = if a.file < b.file || (a.file == b.file && a.method < b.method) {
                (
                    format!("{}::{}", a.file, a.method),
                    format!("{}::{}", b.file, b.method),
                )
            } else {
                (
                    format!("{}::{}", b.file, b.method),
                    format!("{}::{}", a.file, a.method),
                )
            };
            if reported_pairs.contains(&pair_key) {
                continue;
            }

            let jaccard = jaccard_similarity(&a.calls, &b.calls);
            let lcs = lcs_ratio(&a.calls, &b.calls);

            if jaccard >= MIN_JACCARD_SIMILARITY && lcs >= MIN_LCS_RATIO {
                reported_pairs.insert(pair_key);

                // Find the shared calls for the description
                let set_a: std::collections::HashSet<&str> =
                    a.calls.iter().map(|s| s.as_str()).collect();
                let set_b: std::collections::HashSet<&str> =
                    b.calls.iter().map(|s| s.as_str()).collect();
                let mut shared: Vec<&&str> = set_a.intersection(&set_b).collect();
                shared.sort();
                let shared_preview: String = shared
                    .iter()
                    .take(5)
                    .map(|s| format!("`{}`", s))
                    .collect::<Vec<_>>()
                    .join(", ");
                let extra = if shared.len() > 5 {
                    format!(" (+{} more)", shared.len() - 5)
                } else {
                    String::new()
                };

                let suggestion = format!(
                    "`{}` and `{}` follow the same call pattern (Jaccard: {:.0}%, sequence: {:.0}%). \
                     Consider extracting the shared workflow into a parameterized function.",
                    a.method,
                    b.method,
                    jaccard * 100.0,
                    lcs * 100.0
                );

                // Emit finding for file A
                findings.push(Finding {
                    convention: "parallel-implementation".to_string(),
                    severity: Severity::Info,
                    file: a.file.clone(),
                    description: format!(
                        "Parallel implementation: `{}` has similar call pattern to `{}` in {} — shared calls: {}{}",
                        a.method, b.method, b.file, shared_preview, extra
                    ),
                    suggestion: suggestion.clone(),
                    kind: AuditFinding::ParallelImplementation,
                });

                // Emit finding for file B
                findings.push(Finding {
                    convention: "parallel-implementation".to_string(),
                    severity: Severity::Info,
                    file: b.file.clone(),
                    description: format!(
                        "Parallel implementation: `{}` has similar call pattern to `{}` in {} — shared calls: {}{}",
                        b.method, a.method, a.file, shared_preview, extra
                    ),
                    suggestion,
                    kind: AuditFinding::ParallelImplementation,
                });
            }
        }
    }

    findings.sort_by(|a, b| {
        a.file
            .cmp(&b.file)
            .then_with(|| a.description.cmp(&b.description))
    });
    findings
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;
    use crate::code_audit::conventions::Language;

    fn make_fingerprint(path: &str, methods: &[&str], hashes: &[(&str, &str)]) -> FileFingerprint {
        make_fingerprint_with_structural(path, methods, hashes, &[])
    }

    fn make_fingerprint_with_structural(
        path: &str,
        methods: &[&str],
        hashes: &[(&str, &str)],
        structural: &[(&str, &str)],
    ) -> FileFingerprint {
        FileFingerprint {
            relative_path: path.to_string(),
            language: Language::Rust,
            methods: methods.iter().map(|s| s.to_string()).collect(),
            method_hashes: hashes
                .iter()
                .map(|(k, v)| (k.to_string(), v.to_string()))
                .collect(),
            structural_hashes: structural
                .iter()
                .map(|(k, v)| (k.to_string(), v.to_string()))
                .collect(),
            ..Default::default()
        }
    }

    #[test]
    fn detects_exact_duplicate() {
        let fp1 = make_fingerprint("src/utils/io.rs", &["is_zero"], &[("is_zero", "abc123")]);
        let fp2 = make_fingerprint(
            "src/utils/validation.rs",
            &["is_zero"],
            &[("is_zero", "abc123")],
        );

        let findings = detect_duplicates(&[&fp1, &fp2]);

        assert_eq!(findings.len(), 2, "Should emit one finding per location");
        assert!(findings
            .iter()
            .all(|f| f.kind == AuditFinding::DuplicateFunction));
        assert!(findings.iter().any(|f| f.file == "src/utils/io.rs"));
        assert!(findings.iter().any(|f| f.file == "src/utils/validation.rs"));
        assert!(findings[0].description.contains("is_zero"));
    }

    #[test]
    fn no_duplicates_different_hashes() {
        let fp1 = make_fingerprint("src/a.rs", &["process"], &[("process", "hash_a")]);
        let fp2 = make_fingerprint("src/b.rs", &["process"], &[("process", "hash_b")]);

        let findings = detect_duplicates(&[&fp1, &fp2]);
        assert!(
            findings.is_empty(),
            "Different hashes should not flag duplicates"
        );
    }

    #[test]
    fn no_duplicates_single_location() {
        let fp = make_fingerprint("src/only.rs", &["unique_fn"], &[("unique_fn", "abc123")]);

        let findings = detect_duplicates(&[&fp]);
        assert!(findings.is_empty(), "Single location is not a duplicate");
    }

    #[test]
    fn three_way_duplicate() {
        let fp1 = make_fingerprint("src/a.rs", &["helper"], &[("helper", "same_hash")]);
        let fp2 = make_fingerprint("src/b.rs", &["helper"], &[("helper", "same_hash")]);
        let fp3 = make_fingerprint("src/c.rs", &["helper"], &[("helper", "same_hash")]);

        let findings = detect_duplicates(&[&fp1, &fp2, &fp3]);

        assert_eq!(findings.len(), 3, "Should flag all 3 locations");
        assert!(findings[0].suggestion.contains("3 files"));
    }

    #[test]
    fn empty_method_hashes_no_findings() {
        let fp1 = make_fingerprint("src/a.rs", &["foo", "bar"], &[]);
        let fp2 = make_fingerprint("src/b.rs", &["foo", "bar"], &[]);

        let findings = detect_duplicates(&[&fp1, &fp2]);
        assert!(
            findings.is_empty(),
            "No hashes means no duplication findings"
        );
    }

    #[test]
    fn mixed_duplicates_and_unique() {
        let fp1 = make_fingerprint(
            "src/a.rs",
            &["shared", "unique_a"],
            &[("shared", "same"), ("unique_a", "hash_a")],
        );
        let fp2 = make_fingerprint(
            "src/b.rs",
            &["shared", "unique_b"],
            &[("shared", "same"), ("unique_b", "hash_b")],
        );

        let findings = detect_duplicates(&[&fp1, &fp2]);

        assert_eq!(findings.len(), 2, "Only 'shared' should be flagged");
        assert!(findings.iter().all(|f| f.description.contains("shared")));
    }

    // ========================================================================
    // DuplicateGroup / canonical selection tests
    // ========================================================================

    #[test]
    fn group_picks_canonical_by_shortest_path() {
        let fp1 = make_fingerprint("src/core/deep/nested/helper.rs", &["foo"], &[("foo", "h1")]);
        let fp2 = make_fingerprint("src/utils.rs", &["foo"], &[("foo", "h1")]);

        let groups = detect_duplicate_groups(&[&fp1, &fp2]);

        assert_eq!(groups.len(), 1);
        assert_eq!(groups[0].canonical_file, "src/utils.rs");
        assert_eq!(
            groups[0].remove_from,
            vec!["src/core/deep/nested/helper.rs"]
        );
    }

    #[test]
    fn group_prefers_utils_directory() {
        let fp1 = make_fingerprint("src/core/a.rs", &["shared"], &[("shared", "h1")]);
        let fp2 = make_fingerprint("src/utils/helpers.rs", &["shared"], &[("shared", "h1")]);

        let groups = detect_duplicate_groups(&[&fp1, &fp2]);

        assert_eq!(groups.len(), 1);
        assert_eq!(groups[0].canonical_file, "src/utils/helpers.rs");
        assert_eq!(groups[0].remove_from, vec!["src/core/a.rs"]);
    }

    #[test]
    fn group_alphabetical_tiebreaker() {
        let fp1 = make_fingerprint("src/b.rs", &["dup"], &[("dup", "h1")]);
        let fp2 = make_fingerprint("src/a.rs", &["dup"], &[("dup", "h1")]);

        let groups = detect_duplicate_groups(&[&fp1, &fp2]);

        assert_eq!(groups.len(), 1);
        assert_eq!(groups[0].canonical_file, "src/a.rs");
    }

    #[test]
    fn group_three_way_has_two_removals() {
        let fp1 = make_fingerprint("src/a.rs", &["f"], &[("f", "h")]);
        let fp2 = make_fingerprint("src/b.rs", &["f"], &[("f", "h")]);
        let fp3 = make_fingerprint("src/c.rs", &["f"], &[("f", "h")]);

        let groups = detect_duplicate_groups(&[&fp1, &fp2, &fp3]);

        assert_eq!(groups.len(), 1);
        assert_eq!(groups[0].remove_from.len(), 2);
        assert!(!groups[0].remove_from.contains(&groups[0].canonical_file));
    }

    // ========================================================================
    // Near-duplicate detection tests
    // ========================================================================

    /// Helper to build a fingerprint with content for body-line counting.
    fn make_fp_with_content(
        path: &str,
        content: &str,
        hashes: &[(&str, &str)],
        structural: &[(&str, &str)],
    ) -> FileFingerprint {
        let mut fp = make_fingerprint_with_structural(path, &[], hashes, structural);
        fp.content = content.to_string();
        fp
    }

    #[test]
    fn near_duplicate_detected_when_structural_match_but_exact_differs() {
        // cache_path in two files: same structure, different constants
        let content_a = "fn cache_path() -> Option<PathBuf> {\n    paths::homeboy().ok().map(|p| p.join(CACHE_A))\n}\n";
        let content_b = "fn cache_path() -> Option<PathBuf> {\n    paths::homeboy().ok().map(|p| p.join(CACHE_B))\n}\n";

        let fp1 = make_fp_with_content(
            "src/core/update_check.rs",
            content_a,
            &[("cache_path", "hash_a")],
            &[("cache_path", "SAME_STRUCT")],
        );
        let fp2 = make_fp_with_content(
            "src/core/ext_update_check.rs",
            content_b,
            &[("cache_path", "hash_b")],
            &[("cache_path", "SAME_STRUCT")],
        );

        let findings = detect_near_duplicates(&[&fp1, &fp2]);

        assert_eq!(findings.len(), 2, "Should flag both locations");
        assert!(findings
            .iter()
            .all(|f| f.kind == AuditFinding::NearDuplicate));
        assert!(findings[0].description.contains("cache_path"));
        assert_eq!(findings[0].severity, Severity::Info);
    }

    #[test]
    fn near_duplicate_skips_exact_duplicates() {
        // If exact hashes match, exact-duplicate detector already handles it
        let fp1 = make_fingerprint_with_structural(
            "src/a.rs",
            &["helper"],
            &[("helper", "SAME")],
            &[("helper", "SAME_STRUCT")],
        );
        let fp2 = make_fingerprint_with_structural(
            "src/b.rs",
            &["helper"],
            &[("helper", "SAME")],
            &[("helper", "SAME_STRUCT")],
        );

        let findings = detect_near_duplicates(&[&fp1, &fp2]);
        assert!(findings.is_empty(), "Exact duplicates should be excluded");
    }

    #[test]
    fn near_duplicate_skips_generic_names() {
        let content = "fn run() {\n    do_something();\n    do_more();\n}\n";
        let fp1 = make_fp_with_content(
            "src/core/a.rs",
            content,
            &[("run", "hash_a")],
            &[("run", "SAME_STRUCT")],
        );
        let fp2 = make_fp_with_content(
            "src/core/b.rs",
            content,
            &[("run", "hash_b")],
            &[("run", "SAME_STRUCT")],
        );

        let findings = detect_near_duplicates(&[&fp1, &fp2]);
        assert!(
            findings.is_empty(),
            "'run' is a generic name — should be skipped"
        );
    }

    #[test]
    fn near_duplicate_skips_command_core_pairs() {
        let content = "fn deploy_site() {\n    connect();\n    upload();\n    verify();\n}\n";
        let fp1 = make_fp_with_content(
            "src/commands/deploy.rs",
            content,
            &[("deploy_site", "hash_a")],
            &[("deploy_site", "SAME_STRUCT")],
        );
        let fp2 = make_fp_with_content(
            "src/core/deploy.rs",
            content,
            &[("deploy_site", "hash_b")],
            &[("deploy_site", "SAME_STRUCT")],
        );

        let findings = detect_near_duplicates(&[&fp1, &fp2]);
        assert!(findings.is_empty(), "Command↔core pair should be skipped");
    }

    #[test]
    fn near_duplicate_skips_trivial_functions() {
        // default_true is only 1 line — too trivial to refactor
        let content = "fn default_true() -> bool { true }\n";
        let fp1 = make_fp_with_content(
            "src/core/defaults.rs",
            content,
            &[("default_true", "hash_a")],
            &[("default_true", "SAME_STRUCT")],
        );
        let fp2 = make_fp_with_content(
            "src/core/project.rs",
            content,
            &[("default_true", "hash_b")],
            &[("default_true", "SAME_STRUCT")],
        );

        let findings = detect_near_duplicates(&[&fp1, &fp2]);
        assert!(findings.is_empty(), "Trivial functions should be skipped");
    }

    #[test]
    fn near_duplicate_not_skipped_for_multi_line_core_functions() {
        // Non-trivial functions in core/ (not commands/) SHOULD be flagged
        let content = "fn cache_path() -> Option<PathBuf> {\n    let base = paths::homeboy()?;\n    let file = base.join(FILENAME);\n    Some(file)\n}\n";
        let fp1 = make_fp_with_content(
            "src/core/update.rs",
            content,
            &[("cache_path", "hash_a")],
            &[("cache_path", "SAME_STRUCT")],
        );
        let fp2 = make_fp_with_content(
            "src/core/ext_update.rs",
            content,
            &[("cache_path", "hash_b")],
            &[("cache_path", "SAME_STRUCT")],
        );

        let findings = detect_near_duplicates(&[&fp1, &fp2]);
        assert_eq!(
            findings.len(),
            2,
            "Non-trivial core↔core near-duplicates should be flagged"
        );
    }

    #[test]
    fn near_duplicate_skips_all_command_files() {
        // Multiple command files with same structural hash — normal pattern
        let content = "fn components() {\n    let list = config::list();\n    for item in list {\n        output::print(item);\n    }\n}\n";
        let fp1 = make_fp_with_content(
            "src/commands/fleet.rs",
            content,
            &[("components", "hash_a")],
            &[("components", "SAME_STRUCT")],
        );
        let fp2 = make_fp_with_content(
            "src/commands/project.rs",
            content,
            &[("components", "hash_b")],
            &[("components", "SAME_STRUCT")],
        );

        let findings = detect_near_duplicates(&[&fp1, &fp2]);
        assert!(findings.is_empty(), "All-commands group should be skipped");
    }

    // ========================================================================
    // Intra-method duplication tests
    // ========================================================================

    #[test]
    fn intra_method_detects_duplicated_block() {
        // Simulate a merge artifact: same 5-line block appears twice
        let content = "<?php\nclass PipelineSteps {\n    public function handle_update( $request ) {\n        $config = array();\n        $has_provider = $request->has_param( 'provider' );\n        $has_model = $request->has_param( 'model' );\n        $has_prompt = $request->has_param( 'system_prompt' );\n        $has_disabled = $request->has_param( 'disabled_tools' );\n        $has_key = $request->has_param( 'ai_api_key' );\n\n        if ( $has_provider ) {\n            $config['provider'] = sanitize_text_field( $request->get_param( 'provider' ) );\n        }\n\n        $has_provider = $request->has_param( 'provider' );\n        $has_model = $request->has_param( 'model' );\n        $has_prompt = $request->has_param( 'system_prompt' );\n        $has_disabled = $request->has_param( 'disabled_tools' );\n        $has_key = $request->has_param( 'ai_api_key' );\n\n        if ( $has_provider ) {\n            $config['provider'] = sanitize_text_field( $request->get_param( 'provider' ) );\n        }\n\n        return $config;\n    }\n}\n";

        let mut fp = make_fingerprint(
            "inc/Api/Pipelines/PipelineSteps.php",
            &["handle_update"],
            &[],
        );
        fp.content = content.to_string();

        let findings = detect_intra_method_duplicates(&[&fp]);

        assert!(
            !findings.is_empty(),
            "Should detect duplicated block within handle_update"
        );
        assert!(findings[0].kind == AuditFinding::IntraMethodDuplicate);
        assert!(findings[0].description.contains("handle_update"));
    }

    #[test]
    fn intra_method_no_false_positive_on_unique_code() {
        let content = "<?php\nclass Handler {\n    public function process( $data ) {\n        $name = sanitize_text_field( $data['name'] );\n        $email = sanitize_email( $data['email'] );\n        $phone = sanitize_text_field( $data['phone'] );\n        $address = sanitize_text_field( $data['address'] );\n        $city = sanitize_text_field( $data['city'] );\n\n        $result = $this->save( $name, $email );\n        $this->notify( $result );\n        $this->log_action( $result );\n        $this->update_cache( $result );\n        $this->send_confirmation( $email );\n\n        return $result;\n    }\n}\n";

        let mut fp = make_fingerprint("inc/Handler.php", &["process"], &[]);
        fp.content = content.to_string();

        let findings = detect_intra_method_duplicates(&[&fp]);
        assert!(
            findings.is_empty(),
            "Unique code should not trigger intra-method duplication"
        );
    }

    #[test]
    fn intra_method_skips_short_methods() {
        let content = "fn short() {\n    let a = 1;\n    let b = 2;\n    let c = a + b;\n    println!(\"{}\", c);\n}\n";

        let mut fp = make_fingerprint("src/short.rs", &["short"], &[]);
        fp.content = content.to_string();

        let findings = detect_intra_method_duplicates(&[&fp]);
        assert!(findings.is_empty(), "Short methods should be skipped");
    }

    #[test]
    fn intra_method_rust_function_duplicated_block() {
        let content = "fn process_items(items: &[Item]) -> Vec<Result> {\n    let mut results = Vec::new();\n    let config = load_config();\n    let validator = Validator::new(&config);\n    let processor = Processor::new(&config);\n    let output = processor.run(&items[0]);\n\n    results.push(output);\n\n    let config = load_config();\n    let validator = Validator::new(&config);\n    let processor = Processor::new(&config);\n    let output = processor.run(&items[0]);\n\n    results.push(output);\n\n    results\n}\n";

        let mut fp = make_fingerprint("src/core/pipeline.rs", &["process_items"], &[]);
        fp.content = content.to_string();

        let findings = detect_intra_method_duplicates(&[&fp]);
        assert!(
            !findings.is_empty(),
            "Should detect duplicated block in Rust function"
        );
    }

    #[test]
    fn find_method_body_php() {
        let content =
            "<?php\nclass Foo {\n    public function bar() {\n        return 1;\n    }\n}\n";
        let lines: Vec<&str> = content.lines().collect();
        let result = find_method_body(&lines, "bar");
        assert!(result.is_some());
        let (open, close) = result.unwrap();
        assert!(lines[open].contains('{'));
        assert!(lines[close].contains('}'));
    }

    #[test]
    fn find_method_body_rust() {
        let content = "fn hello() {\n    println!(\"hi\");\n}\n";
        let lines: Vec<&str> = content.lines().collect();
        let result = find_method_body(&lines, "hello");
        assert!(result.is_some());
    }

    #[test]
    fn find_method_body_missing() {
        let content = "fn other() {\n    println!(\"hi\");\n}\n";
        let lines: Vec<&str> = content.lines().collect();
        let result = find_method_body(&lines, "nonexistent");
        assert!(result.is_none());
    }

    // ========================================================================
    // Parallel Implementation Detection tests
    // ========================================================================

    fn make_fingerprint_with_content(
        path: &str,
        methods: &[&str],
        content: &str,
    ) -> FileFingerprint {
        FileFingerprint {
            relative_path: path.to_string(),
            language: Language::Rust,
            methods: methods.iter().map(|s| s.to_string()).collect(),
            content: content.to_string(),
            ..Default::default()
        }
    }

    #[test]
    fn detects_parallel_implementation() {
        let fp1 = make_fingerprint_with_content(
            "src/deploy.rs",
            &["deploy_to_server"],
            "fn deploy_to_server() {\n    validate_component();\n    build_artifact();\n    upload_to_host();\n    run_post_hooks();\n    notify_complete();\n}",
        );
        let fp2 = make_fingerprint_with_content(
            "src/upgrade.rs",
            &["upgrade_on_server"],
            "fn upgrade_on_server() {\n    validate_component();\n    build_artifact();\n    upload_to_host();\n    run_post_hooks();\n    send_notification();\n}",
        );

        let findings = detect_parallel_implementations(&[&fp1, &fp2]);

        assert_eq!(findings.len(), 2, "Should emit one finding per file");
        assert!(findings
            .iter()
            .all(|f| f.kind == AuditFinding::ParallelImplementation));
        assert!(findings.iter().any(|f| f.file == "src/deploy.rs"));
        assert!(findings.iter().any(|f| f.file == "src/upgrade.rs"));
    }

    #[test]
    fn no_parallel_for_unrelated_functions() {
        let fp1 = make_fingerprint_with_content(
            "src/deploy.rs",
            &["deploy_to_server"],
            "fn deploy_to_server() {\n    validate();\n    build();\n    upload();\n    notify();\n}",
        );
        let fp2 = make_fingerprint_with_content(
            "src/parser.rs",
            &["parse_config"],
            "fn parse_config() {\n    read_file();\n    tokenize();\n    parse_ast();\n    validate_schema();\n}",
        );

        let findings = detect_parallel_implementations(&[&fp1, &fp2]);
        assert!(
            findings.is_empty(),
            "Completely different call sets should not flag"
        );
    }

    #[test]
    fn no_parallel_for_same_file() {
        let fp = make_fingerprint_with_content(
            "src/ops.rs",
            &["deploy_op", "upgrade_op"],
            "fn deploy_op() {\n    validate();\n    build();\n    upload();\n    notify();\n}\nfn upgrade_op() {\n    validate();\n    build();\n    upload();\n    notify();\n}",
        );

        let findings = detect_parallel_implementations(&[&fp]);
        assert!(
            findings.is_empty(),
            "Same-file methods should not be flagged as parallel"
        );
    }

    #[test]
    fn no_parallel_for_trivial_methods() {
        let fp1 = make_fingerprint_with_content(
            "src/a.rs",
            &["small_a"],
            "fn small_a() {\n    foo();\n    bar();\n}",
        );
        let fp2 = make_fingerprint_with_content(
            "src/b.rs",
            &["small_b"],
            "fn small_b() {\n    foo();\n    bar();\n}",
        );

        let findings = detect_parallel_implementations(&[&fp1, &fp2]);
        assert!(
            findings.is_empty(),
            "Methods with < MIN_CALL_COUNT calls should be skipped"
        );
    }

    #[test]
    fn no_parallel_for_generic_names() {
        // "run" is in GENERIC_NAMES
        let fp1 = make_fingerprint_with_content(
            "src/a.rs",
            &["run"],
            "fn run() {\n    validate();\n    build();\n    upload();\n    notify();\n}",
        );
        let fp2 = make_fingerprint_with_content(
            "src/b.rs",
            &["execute"],
            "fn execute() {\n    validate();\n    build();\n    upload();\n    notify();\n}",
        );

        // "run" is skipped, so only one method in the pool — no pair to compare
        let findings = detect_parallel_implementations(&[&fp1, &fp2]);
        // Only fp2's "execute" has a valid call sequence; fp1's "run" is filtered
        // So there's only 1 candidate, no pair → no findings
        assert!(findings.is_empty(), "Generic names should be filtered out");
    }

    #[test]
    fn extract_calls_skips_keywords() {
        let body = "if something() {\n    let x = process();\n    for item in list() {\n        handle(item);\n    }\n}";
        let calls = extract_calls_from_body(body);
        assert!(calls.contains(&"something".to_string()));
        assert!(calls.contains(&"process".to_string()));
        assert!(calls.contains(&"list".to_string()));
        assert!(calls.contains(&"handle".to_string()));
        assert!(!calls.contains(&"if".to_string()));
        assert!(!calls.contains(&"for".to_string()));
        assert!(!calls.contains(&"let".to_string()));
    }

    #[test]
    fn jaccard_identical_sets() {
        let a = vec!["foo".to_string(), "bar".to_string()];
        assert!((jaccard_similarity(&a, &a) - 1.0).abs() < f64::EPSILON);
    }

    #[test]
    fn jaccard_disjoint_sets() {
        let a = vec!["foo".to_string()];
        let b = vec!["bar".to_string()];
        assert!((jaccard_similarity(&a, &b)).abs() < f64::EPSILON);
    }

    #[test]
    fn lcs_identical_sequences() {
        let a = vec!["a".to_string(), "b".to_string(), "c".to_string()];
        assert_eq!(lcs_length(&a, &a), 3);
        assert!((lcs_ratio(&a, &a) - 1.0).abs() < f64::EPSILON);
    }

    #[test]
    fn lcs_partial_overlap() {
        let a = vec!["a".to_string(), "b".to_string(), "c".to_string()];
        let b = vec!["a".to_string(), "x".to_string(), "c".to_string()];
        assert_eq!(lcs_length(&a, &b), 2); // a, c
    }
}