cqs 1.25.0 - Docs.rs

//! Core impact analysis — caller discovery, test mapping, test suggestions

use std::collections::{HashMap, HashSet};

use crate::store::{CallerWithContext, SearchResult, StoreError};
use crate::AnalysisError;
use crate::Store;

use super::bfs::reverse_bfs;
use super::types::{
    CallerDetail, ImpactResult, TestInfo, TestSuggestion, TransitiveCaller, TypeImpacted,
};
use super::DEFAULT_MAX_TEST_SEARCH_DEPTH;

use crate::{normalize_path, normalize_slashes};

use std::path::Path;

/// Relativize a path against a root, returning the stripped version.
fn rel_path(path: &Path, root: &Path) -> std::path::PathBuf {
    path.strip_prefix(root).unwrap_or(path).to_path_buf()
}

/// Options for impact analysis.
pub struct ImpactOptions {
    /// Maximum depth for transitive caller discovery (default: 3).
    pub depth: usize,
    /// Whether to include type-impacted functions (default: true).
    pub include_types: bool,
}

impl Default for ImpactOptions {
    fn default() -> Self {
        Self {
            depth: 3,
            include_types: true,
        }
    }
}

/// Run impact analysis: find callers, affected tests, and transitive callers.
/// Paths in the returned result are relative to `root`.
/// When `opts.include_types` is true, also performs one-hop type expansion: finds
/// other functions that share type dependencies with the target via `type_edges`.
pub fn analyze_impact(
    store: &Store,
    target_name: &str,
    root: &Path,
    opts: &ImpactOptions,
) -> Result<ImpactResult, AnalysisError> {
    let _span = tracing::info_span!(
        "analyze_impact",
        target = target_name,
        depth = opts.depth,
        include_types = opts.include_types
    )
    .entered();
    let (callers, mut degraded) = build_caller_info(store, target_name, root)?;
    let graph = store.get_call_graph()?;
    let test_chunks = store.find_test_chunks()?;
    let tests = find_affected_tests_with_chunks(
        &graph,
        &test_chunks,
        target_name,
        DEFAULT_MAX_TEST_SEARCH_DEPTH,
    )
    .into_iter()
    .map(|t| TestInfo {
        file: rel_path(&t.file, root),
        ..t
    })
    .collect();
    let transitive_callers = if opts.depth > 1 {
        find_transitive_callers(store, &graph, target_name, opts.depth, root)?
    } else {
        Vec::new()
    };

    let type_impacted = if opts.include_types {
        match find_type_impacted(store, target_name, root) {
            Ok(ti) => ti,
            Err(e) => {
                tracing::warn!(target = target_name, error = %e, "Failed to compute type-impacted");
                degraded = true;
                Vec::new()
            }
        }
    } else {
        Vec::new()
    };

    Ok(ImpactResult {
        function_name: target_name.to_string(),
        callers,
        tests,
        transitive_callers,
        type_impacted,
        degraded,
    })
}

/// Build caller detail with call-site snippets.
/// Batch-fetches all caller chunks in a single query (via `search_by_names_batch`)
/// to avoid N+1 per-caller `search_by_name` calls.
/// Returns `(callers, degraded)` — `degraded` is true when the batch name search
/// failed and caller snippets may be incomplete.
fn build_caller_info(
    store: &Store,
    target_name: &str,
    root: &Path,
) -> Result<(Vec<CallerDetail>, bool), StoreError> {
    let callers_ctx = store.get_callers_with_context(target_name)?;

    // Batch-fetch chunk data for all unique caller names
    let unique_names: Vec<&str> = {
        let mut seen = HashSet::new();
        callers_ctx
            .iter()
            .filter(|c| seen.insert(c.name.as_str()))
            .map(|c| c.name.as_str())
            .collect()
    };
    let (chunks_by_name, degraded) = match store.search_by_names_batch(&unique_names, 5) {
        Ok(map) => (map, false),
        Err(e) => {
            tracing::warn!(error = %e, "Failed to batch-fetch caller chunks for snippets");
            (HashMap::new(), true)
        }
    };

    let mut callers = Vec::with_capacity(callers_ctx.len());
    for caller in &callers_ctx {
        let snippet = extract_call_snippet_from_cache(&chunks_by_name, caller);
        callers.push(CallerDetail {
            name: caller.name.clone(),
            file: rel_path(&caller.file, root),
            line: caller.line,
            call_line: caller.call_line,
            snippet,
        });
    }

    Ok((callers, degraded))
}

/// Extract a snippet around the call site using pre-fetched chunk data.
/// Prefers non-windowed chunks (correct line offsets) over windowed ones.
pub(super) fn extract_call_snippet_from_cache(
    chunks_by_name: &HashMap<String, Vec<SearchResult>>,
    caller: &CallerWithContext,
) -> Option<String> {
    let results = chunks_by_name.get(&caller.name)?;

    // Prefer non-windowed chunk (correct line offsets)
    let best = {
        let mut best = None;
        for r in results {
            if r.chunk.parent_id.is_none() {
                best = Some(r);
                break;
            }
            if best.is_none() {
                best = Some(r);
            }
        }
        best
    }?;

    // Bounds check: call_line must fall within chunk's line range (windowed chunks
    // may not cover the call site)
    if caller.call_line < best.chunk.line_start || caller.call_line > best.chunk.line_end {
        return None;
    }

    let lines: Vec<&str> = best.chunk.content.lines().collect();
    let offset = caller.call_line.saturating_sub(best.chunk.line_start) as usize;
    if offset < lines.len() {
        let start = offset.saturating_sub(1);
        // Always show 3 lines from start (consistent window regardless of position)
        let end = (start + 3).min(lines.len());
        Some(lines[start..end].join("\n"))
    } else {
        None
    }
}

/// Find tests that exercise `target_name` via call graph traversal.
/// Accepts pre-loaded graph and test chunks — no Store access needed.
/// Used by `onboard` and `task` commands that pre-load shared resources.
pub(crate) fn find_affected_tests_with_chunks(
    graph: &crate::store::CallGraph,
    test_chunks: &[crate::store::ChunkSummary],
    target_name: &str,
    max_depth: usize,
) -> Vec<TestInfo> {
    let ancestors = reverse_bfs(graph, target_name, max_depth);
    let mut tests: Vec<TestInfo> = test_chunks
        .iter()
        .filter_map(|test| {
            ancestors.get(&test.name).and_then(|&d| {
                if d > 0 {
                    Some(TestInfo {
                        name: test.name.clone(),
                        file: test.file.clone(),
                        line: test.line_start,
                        call_depth: d,
                    })
                } else {
                    None
                }
            })
        })
        .collect();
    tests.sort_by_key(|t| t.call_depth);
    tests
}

/// Find transitive callers up to the given depth.
/// Uses `reverse_bfs` to discover all ancestor names in a single graph traversal,
/// then batch-fetches chunk locations with `search_by_names_batch` to avoid N+1 queries.
fn find_transitive_callers(
    store: &Store,
    graph: &crate::store::CallGraph,
    target_name: &str,
    depth: usize,
    root: &Path,
) -> Result<Vec<TransitiveCaller>, StoreError> {
    // Single graph traversal to collect all ancestors + depths
    let ancestors = reverse_bfs(graph, target_name, depth);

    // Filter out the target itself and depth-0 entries
    let caller_entries: Vec<(&str, usize)> = ancestors
        .iter()
        .filter(|(name, &d)| d > 0 && name.as_str() != target_name)
        .map(|(name, &d)| (name.as_str(), d))
        .collect();

    if caller_entries.is_empty() {
        return Ok(Vec::new());
    }

    // Batch-fetch all chunk locations in one query (exact name match, not FTS)
    let names: Vec<&str> = caller_entries.iter().map(|(n, _)| *n).collect();
    let chunks_by_name = store.get_chunks_by_names_batch(&names).unwrap_or_else(|e| {
        tracing::warn!(error = %e, "Failed to batch-fetch transitive caller locations");
        HashMap::new()
    });

    // Build results from batch data
    let mut result = Vec::with_capacity(caller_entries.len());
    for (name, d) in &caller_entries {
        if let Some(chunks) = chunks_by_name.get(*name) {
            if let Some(c) = chunks.first() {
                result.push(TransitiveCaller {
                    name: name.to_string(),
                    file: rel_path(&c.file, root),
                    line: c.line_start,
                    depth: *d,
                });
            }
        }
    }

    Ok(result)
}

/// Suggest tests for untested callers in an impact result.
/// Loads its own call graph and test chunks — only called when `--suggest-tests`
/// is set, so the normal path pays zero overhead.
pub fn suggest_tests(store: &Store, impact: &ImpactResult, root: &Path) -> Vec<TestSuggestion> {
    let _span = tracing::info_span!("suggest_tests", function = %impact.function_name).entered();
    let graph = match store.get_call_graph() {
        Ok(g) => g,
        Err(e) => {
            tracing::warn!(error = %e, "Failed to load call graph for test suggestions");
            return Vec::new();
        }
    };
    let test_chunks = match store.find_test_chunks() {
        Ok(t) => t,
        Err(e) => {
            tracing::warn!(error = %e, "Failed to load test chunks for test suggestions");
            return Vec::new();
        }
    };

    // Batch-fetch file chunks for all unique caller files upfront.
    // This avoids N+1 `get_chunks_by_origin` calls when processing untested callers.
    // Paths in ImpactResult are relative; reconstruct absolute paths for store queries.
    let unique_files: Vec<String> = {
        let mut seen = HashSet::new();
        impact
            .callers
            .iter()
            .filter_map(|c| {
                let abs = root.join(&c.file);
                let f = abs.to_string_lossy().to_string();
                if seen.insert(f.clone()) {
                    Some(f)
                } else {
                    None
                }
            })
            .collect()
    };
    let file_refs: Vec<&str> = unique_files.iter().map(|s| s.as_str()).collect();
    let chunks_by_file = store
        .get_chunks_by_origins_batch(&file_refs)
        .unwrap_or_else(|e| {
            tracing::warn!(error = %e, "Failed to batch-fetch file chunks for test suggestions");
            HashMap::new()
        });

    let mut suggestions = Vec::new();

    for caller in &impact.callers {
        // Check if this caller is reached by ANY test (not just the target's tests).
        // Per-caller BFS is correct here because we need per-caller test status.
        // PF-9 analysis: reverse_bfs_multi_attributed cannot replace this loop because
        // it attributes each ancestor to only one source (the closest). A test reachable
        // from callers A and B at different depths would only be attributed to one,
        // making the other appear untested. Caller count is typically small (direct
        // callers only), so per-caller BFS is acceptable.
        let ancestors = reverse_bfs(&graph, &caller.name, DEFAULT_MAX_TEST_SEARCH_DEPTH);
        let is_tested = test_chunks
            .iter()
            .any(|t| ancestors.get(&t.name).is_some_and(|&d| d > 0));

        if is_tested {
            continue;
        }

        // Use pre-fetched file chunks for inline test check, pattern, and language
        let caller_file_key = root.join(&caller.file).to_string_lossy().to_string();
        let file_chunks = chunks_by_file
            .get(&caller_file_key)
            .map(|v| v.as_slice())
            .unwrap_or(&[]);

        let chunk_is_test = |c: &crate::store::ChunkSummary| {
            crate::is_test_chunk(&c.name, &c.file.to_string_lossy())
        };

        let has_inline_tests = file_chunks.iter().any(chunk_is_test);

        let pattern_source = if has_inline_tests {
            file_chunks
                .iter()
                .find(|c| chunk_is_test(c))
                .map(|c| c.name.clone())
                .unwrap_or_default()
        } else {
            String::new()
        };

        let language = file_chunks.first().map(|c| c.language);

        // EX-18: Generate test name via LanguageDef.test_name_suggestion
        let base_name = caller.name.trim_start_matches("self.");
        let test_name = language
            .and_then(|lang| lang.def().test_name_suggestion)
            .map(|suggest_fn| suggest_fn(base_name))
            .unwrap_or_else(|| format!("test_{base_name}"));

        // Suggest file location
        let caller_file_str = normalize_path(&caller.file);

        let suggested_file = if has_inline_tests {
            std::path::PathBuf::from(&caller_file_str)
        } else {
            std::path::PathBuf::from(suggest_test_file(&caller_file_str))
        };

        suggestions.push(TestSuggestion {
            test_name,
            suggested_file,
            for_function: caller.name.clone(),
            pattern_source,
            inline: has_inline_tests,
        });
    }

    suggestions
}

/// Derive a test file path from a source file path.
/// Uses per-language test file conventions from `LanguageDef::test_file_suggestion`.
/// Falls back to `{parent}/tests/{stem}_test.{ext}` for unknown languages.
fn suggest_test_file(source: &str) -> String {
    let path = std::path::Path::new(source);
    let stem = path
        .file_stem()
        .and_then(|s| s.to_str())
        .unwrap_or("unknown");
    let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("rs");
    let parent = normalize_slashes(path.parent().and_then(|p| p.to_str()).unwrap_or("tests"));

    // Look up language-specific convention via registry
    if let Some(lang_def) = crate::language::REGISTRY.from_extension(ext) {
        if let Some(suggest_fn) = lang_def.test_file_suggestion {
            return suggest_fn(stem, &parent);
        }
    }
    // Fallback for unknown languages
    format!("{parent}/tests/{stem}_test.{ext}")
}

/// One-hop type expansion: find functions that share type dependencies with the target.
/// Uses the type_edges table (not the full TypeGraph) to avoid loading the entire graph
/// when only one function's types are needed. Steps:
/// 1. Get types used by target via `get_types_used_by`
/// 2. Filter out common types (String, Vec, etc.)
/// 3. For each remaining type, find other users via `get_type_users_batch`
/// 4. Aggregate by function name, track which types are shared
fn find_type_impacted(
    store: &Store,
    target_name: &str,
    root: &Path,
) -> Result<Vec<TypeImpacted>, StoreError> {
    let _span = tracing::info_span!("find_type_impacted", target = target_name).entered();

    let type_pairs = store.get_types_used_by(target_name)?;
    let type_names: Vec<String> = type_pairs
        .into_iter()
        .map(|t| t.type_name)
        .filter(|name| !crate::focused_read::COMMON_TYPES.contains(name.as_str()))
        .collect::<std::collections::HashSet<_>>()
        .into_iter()
        .collect();

    if type_names.is_empty() {
        return Ok(Vec::new());
    }

    tracing::debug!(
        type_count = type_names.len(),
        "Type names for impact expansion"
    );

    let refs: Vec<&str> = type_names.iter().map(|s| s.as_str()).collect();
    let results = store.get_type_users_batch(&refs)?;

    // Aggregate: function_name -> set of shared type names (deduplicated)
    let mut shared: HashMap<String, HashSet<String>> = HashMap::new();
    let mut chunk_info: HashMap<String, (std::path::PathBuf, u32)> = HashMap::new();

    for (type_name, chunks) in &results {
        for chunk in chunks {
            if chunk.name == target_name {
                continue;
            }
            if !matches!(
                chunk.chunk_type,
                crate::language::ChunkType::Function | crate::language::ChunkType::Method
            ) {
                continue;
            }
            shared
                .entry(chunk.name.clone())
                .or_default()
                .insert(type_name.clone());
            chunk_info
                .entry(chunk.name.clone())
                .or_insert((chunk.file.clone(), chunk.line_start));
        }
    }

    tracing::debug!(
        impacted_count = shared.len(),
        "Type-impacted functions found"
    );

    // Sort by number of shared types descending
    let mut sorted: Vec<(String, Vec<String>)> = shared
        .into_iter()
        .map(|(name, set)| (name, set.into_iter().collect()))
        .collect();
    sorted.sort_by(|a, b| b.1.len().cmp(&a.1.len()));

    Ok(sorted
        .into_iter()
        .filter_map(|(name, types)| {
            let (file, line) = chunk_info.remove(&name)?;
            Some(TypeImpacted {
                name,
                file: rel_path(&file, root),
                line,
                shared_types: types,
            })
        })
        .collect())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_suggest_test_file_rust() {
        assert_eq!(
            suggest_test_file("src/search.rs"),
            "src/tests/search_test.rs"
        );
    }

    #[test]
    fn test_suggest_test_file_python() {
        assert_eq!(suggest_test_file("src/search.py"), "src/test_search.py");
    }

    #[test]
    fn test_suggest_test_file_typescript() {
        assert_eq!(suggest_test_file("src/search.ts"), "src/search.test.ts");
    }

    #[test]
    fn test_suggest_test_file_javascript() {
        assert_eq!(suggest_test_file("src/search.js"), "src/search.test.js");
    }

    #[test]
    fn test_suggest_test_file_go() {
        assert_eq!(suggest_test_file("pkg/search.go"), "pkg/search_test.go");
    }

    #[test]
    fn test_suggest_test_file_java() {
        assert_eq!(suggest_test_file("src/Search.java"), "src/SearchTest.java");
    }

    #[test]
    fn test_find_affected_tests_with_chunks() {
        use crate::store::helpers::CallGraph;
        use crate::store::ChunkSummary;
        use std::collections::HashMap;
        use std::path::PathBuf;

        // Build a call graph: test_foo -> bar -> target
        let mut forward = HashMap::new();
        forward.insert("test_foo".to_string(), vec!["bar".to_string()]);
        forward.insert("bar".to_string(), vec!["target".to_string()]);
        forward.insert("unrelated_test".to_string(), vec!["baz".to_string()]);
        let mut reverse = HashMap::new();
        reverse.insert("target".to_string(), vec!["bar".to_string()]);
        reverse.insert("bar".to_string(), vec!["test_foo".to_string()]);
        reverse.insert("baz".to_string(), vec!["unrelated_test".to_string()]);
        let graph = CallGraph::from_string_maps(forward, reverse);

        let test_chunks = vec![
            ChunkSummary {
                id: "1".into(),
                name: "test_foo".into(),
                file: PathBuf::from("tests/foo.rs"),
                line_start: 10,
                line_end: 20,
                language: crate::parser::Language::Rust,
                chunk_type: crate::parser::ChunkType::Function,
                signature: "fn test_foo()".into(),
                content: String::new(),
                doc: None,
                parent_id: None,
                parent_type_name: None,
                content_hash: String::new(),
                window_idx: None,
            },
            ChunkSummary {
                id: "2".into(),
                name: "unrelated_test".into(),
                file: PathBuf::from("tests/other.rs"),
                line_start: 5,
                line_end: 15,
                language: crate::parser::Language::Rust,
                chunk_type: crate::parser::ChunkType::Function,
                signature: "fn unrelated_test()".into(),
                content: String::new(),
                doc: None,
                parent_id: None,
                parent_type_name: None,
                content_hash: String::new(),
                window_idx: None,
            },
        ];

        let tests = find_affected_tests_with_chunks(&graph, &test_chunks, "target", 5);
        assert_eq!(tests.len(), 1);
        assert_eq!(tests[0].name, "test_foo");
        assert_eq!(tests[0].call_depth, 2); // test_foo -> bar -> target
    }
}