cgz 2026.5.5 - Docs.rs

pub mod config;
pub mod db;
pub mod extraction;
pub mod graph;
pub mod installer;
pub mod mcp;
pub mod types;
pub mod watcher;

use anyhow::{anyhow, Context, Result};
use config::{load_config, save_config, CodeGraphConfig};
use db::Database;
use extraction::{detect_language, detect_parse_error, extract_from_source, should_include_file};
use graph::{GraphTraverser, Subgraph};
use sha2::{Digest, Sha256};
use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::path::{Path, PathBuf};
use types::{
    AffectedDebugEntry, AffectedMatchSources, AffectedReport, ContextFileSummary, ContextMatch,
    ContextReport, ContextSymbolSummary, EdgeKind, ExploreRelationship, ExploreReport,
    ExploreSourceFile, ExploreSourceSection, FileLanguageGroup, FileListEntry, FileListFormat,
    FileListOptions, FileListReport, FileRecord, FileTreeEntry, GraphPath, GraphStats, IndexError,
    IndexErrorCategory, IndexResult, Language, Node, NodeEdge, SearchOptions, SearchResult,
};

pub const CODEGRAPH_DIR: &str = ".codegraph";
pub const DATABASE_FILE: &str = "codegraph.db";

pub struct CodeGraph {
    root: PathBuf,
    config: CodeGraphConfig,
    db: Database,
}

impl CodeGraph {
    pub fn init(root: impl AsRef<Path>) -> Result<Self> {
        let root = root
            .as_ref()
            .canonicalize()
            .unwrap_or_else(|_| root.as_ref().to_path_buf());
        let dir = root.join(CODEGRAPH_DIR);
        if dir.exists() {
            return Err(anyhow!(
                "CodeGraph already initialized in {}",
                root.display()
            ));
        }
        fs::create_dir_all(&dir).with_context(|| format!("creating {}", dir.display()))?;
        let config = CodeGraphConfig::default_for_root(".");
        save_config(&root, &config)?;
        let db = Database::initialize(dir.join(DATABASE_FILE))?;
        Ok(Self { root, config, db })
    }

    pub fn open(root: impl AsRef<Path>) -> Result<Self> {
        let root = find_nearest_codegraph_root(root.as_ref())
            .ok_or_else(|| anyhow!("CodeGraph not initialized in {}", root.as_ref().display()))?;
        let config = load_config(&root)?;
        let db = Database::open(root.join(CODEGRAPH_DIR).join(DATABASE_FILE))?;
        Ok(Self { root, config, db })
    }

    pub fn root(&self) -> &Path {
        &self.root
    }

    pub fn config(&self) -> &CodeGraphConfig {
        &self.config
    }

    pub fn index_all(&mut self) -> Result<IndexResult> {
        let start = std::time::Instant::now();
        self.db.clear_all()?;
        let files = self.scan_files()?;
        let mut result = IndexResult::default();

        for path in files {
            self.index_changed_file(&path, &mut result)?;
        }

        self.db.clear_resolved_reference_edges()?;
        self.db.resolve_references(&self.root)?;
        result.edges_created = self.db.edge_count()?;
        result.success = result.files_errored == 0;
        result.duration_ms = start.elapsed().as_millis() as i64;
        Ok(result)
    }

    pub fn sync(&mut self) -> Result<IndexResult> {
        let start = std::time::Instant::now();
        let files = self.scan_files()?;
        let current_paths = files
            .iter()
            .map(|path| normalized_path(path))
            .collect::<BTreeSet<_>>();
        let existing = self
            .db
            .get_all_files()?
            .into_iter()
            .map(|file| (file.path.clone(), file))
            .collect::<BTreeMap<_, _>>();
        let mut result = IndexResult::default();
        let mut changed = false;

        for path in existing.keys() {
            if !current_paths.contains(path) {
                self.db.delete_file_index(path)?;
                result.files_deleted += 1;
                changed = true;
            }
        }

        for path in files {
            let path_key = normalized_path(&path);
            let full = self.root.join(&path);
            let content = match fs::read_to_string(&full) {
                Ok(content) => content,
                Err(err) => {
                    push_index_error(
                        &mut result,
                        categorize_read_error(&err),
                        &path,
                        err.to_string(),
                    );
                    continue;
                }
            };
            let hash = content_hash(&content);
            if existing
                .get(&path_key)
                .is_some_and(|file| file.content_hash == hash)
            {
                result.files_skipped += 1;
                continue;
            }
            self.index_changed_file_with_content(&path, content, Some(hash), &mut result)?;
            changed = true;
        }

        if changed {
            self.db.clear_resolved_reference_edges()?;
            self.db.resolve_references(&self.root)?;
        }
        result.edges_created = self.db.edge_count()?;
        result.success = result.files_errored == 0;
        result.duration_ms = start.elapsed().as_millis() as i64;
        Ok(result)
    }

    pub fn stats(&self) -> Result<GraphStats> {
        self.db.stats()
    }

    pub fn search_nodes(&self, query: &str, options: SearchOptions) -> Result<Vec<SearchResult>> {
        self.db.search_nodes(query, options)
    }

    pub fn get_node(&self, id: &str) -> Result<Option<Node>> {
        self.db.get_node(id)
    }

    pub fn get_callers(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
        GraphTraverser::new(&self.db).get_callers(node_id, max_depth)
    }

    pub fn get_callees(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
        GraphTraverser::new(&self.db).get_callees(node_id, max_depth)
    }

    pub fn get_impact_radius(&self, node_id: &str, max_depth: usize) -> Result<Subgraph> {
        GraphTraverser::new(&self.db).get_impact_radius(node_id, max_depth)
    }

    pub fn find_paths(
        &self,
        from_node_id: &str,
        to_node_id: &str,
        max_depth: usize,
        max_paths: usize,
    ) -> Result<Vec<GraphPath>> {
        GraphTraverser::new(&self.db).find_paths(from_node_id, to_node_id, max_depth, max_paths)
    }

    pub fn get_file_dependents(&self, file_path: &str) -> Result<Vec<String>> {
        self.db.get_file_dependents(file_path)
    }

    pub fn get_all_files(&self) -> Result<Vec<FileRecord>> {
        self.db.get_all_files()
    }

    pub fn list_files(&self, options: FileListOptions) -> Result<FileListReport> {
        let max_depth = options.max_depth.map(|depth| depth.clamp(1, 20));
        let mut files = self
            .get_all_files()?
            .into_iter()
            .filter(|file| {
                options
                    .path_filter
                    .as_deref()
                    .map(|path| file_path_matches_filter(path, &file.path))
                    .unwrap_or(true)
            })
            .filter(|file| {
                options
                    .pattern
                    .as_deref()
                    .map(|pattern| file_pattern_matches(pattern, &file.path))
                    .unwrap_or(true)
            })
            .filter(|file| {
                max_depth
                    .map(|depth| file.path.split('/').count() <= depth)
                    .unwrap_or(true)
            })
            .map(|file| file_list_entry(file, options.include_metadata))
            .collect::<Vec<_>>();
        files.sort_by(|a, b| a.path.cmp(&b.path));

        let groups = if options.format == FileListFormat::Grouped {
            grouped_file_entries(&files)
        } else {
            Vec::new()
        };
        let tree = if options.format == FileListFormat::Tree {
            build_file_tree(&files)
        } else {
            Vec::new()
        };
        let format = match options.format {
            FileListFormat::Grouped => "grouped",
            FileListFormat::Flat => "flat",
            FileListFormat::Tree => "tree",
        }
        .to_string();

        Ok(FileListReport {
            format,
            path_filter: options.path_filter,
            pattern: options.pattern,
            include_metadata: options.include_metadata,
            max_depth,
            total_files: files.len(),
            files,
            groups,
            tree,
        })
    }

    pub fn build_affected_report(&self, files: &[String]) -> Result<AffectedReport> {
        let indexed_files = self.get_all_files()?;
        let moonbit_packages = MoonBitPackageGraph::from_root(&self.root, &indexed_files);
        let mut affected = BTreeSet::new();
        let mut debug = Vec::new();
        let mut warnings = Vec::new();

        for file in files {
            if is_test_file(file) {
                affected.insert(file.clone());
                debug.push(AffectedDebugEntry {
                    changed_file: file.clone(),
                    reason: "changed file is a test file".to_string(),
                    matched_tests: vec![file.clone()],
                    matched_by: AffectedMatchSources {
                        direct_test_input: vec![file.clone()],
                        import_dependents: Vec::new(),
                        moonbit_same_package: Vec::new(),
                        moonbit_package_dependents: Vec::new(),
                        rust_name_heuristic: Vec::new(),
                        rust_workspace_heuristic: Vec::new(),
                    },
                });
                continue;
            }

            let mut matched = BTreeSet::new();
            let mut import_dependents = BTreeSet::new();
            for dep in self.get_file_dependents(file)? {
                if is_test_file(&dep) {
                    import_dependents.insert(dep.clone());
                    matched.insert(dep.clone());
                    affected.insert(dep);
                }
            }

            let moonbit_tests: BTreeSet<String> = moonbit_same_package_tests(file, &indexed_files)
                .into_iter()
                .collect();
            for test in &moonbit_tests {
                matched.insert(test.clone());
                affected.insert(test.clone());
            }
            let moonbit_package_tests: BTreeSet<String> = moonbit_packages
                .dependent_package_tests(file)
                .into_iter()
                .collect();
            for test in &moonbit_package_tests {
                matched.insert(test.clone());
                affected.insert(test.clone());
            }
            let rust_tests: BTreeSet<String> = rust_name_heuristic_tests(file, &indexed_files)
                .into_iter()
                .collect();
            for test in &rust_tests {
                matched.insert(test.clone());
                affected.insert(test.clone());
            }
            let rust_workspace_tests: BTreeSet<String> =
                rust_workspace_heuristic_tests(&self.root, file, &indexed_files)
                    .into_iter()
                    .collect();
            for test in &rust_workspace_tests {
                matched.insert(test.clone());
                affected.insert(test.clone());
            }

            if matched.is_empty() {
                warnings.push(format!(
                    "{file}: no import-dependent tests, MoonBit same-package tests, MoonBit package-dependent tests, Rust name-heuristic tests, or Rust workspace tests found"
                ));
            }
            debug.push(AffectedDebugEntry {
                changed_file: file.clone(),
                reason: if matched.is_empty() {
                    "no import-dependent tests, MoonBit same-package tests, MoonBit package-dependent tests, Rust name-heuristic tests, or Rust workspace tests found".to_string()
                } else {
                    "matched import-dependent tests, MoonBit same-package tests, MoonBit package-dependent tests, Rust name-heuristic tests, and/or Rust workspace tests".to_string()
                },
                matched_tests: matched.into_iter().collect(),
                matched_by: AffectedMatchSources {
                    direct_test_input: Vec::new(),
                    import_dependents: import_dependents.into_iter().collect(),
                    moonbit_same_package: moonbit_tests.into_iter().collect(),
                    moonbit_package_dependents: moonbit_package_tests.into_iter().collect(),
                    rust_name_heuristic: rust_tests.into_iter().collect(),
                    rust_workspace_heuristic: rust_workspace_tests.into_iter().collect(),
                },
            });
        }

        Ok(AffectedReport {
            changed_files: files.to_vec(),
            affected_tests: affected.into_iter().collect(),
            debug,
            warnings,
        })
    }

    pub fn build_context(&self, task: &str, max_nodes: i64, include_code: bool) -> Result<String> {
        let report = self.build_context_report(task, max_nodes, include_code)?;
        let mut out = format!("## Context: {task}\n\n");
        if report.matches.is_empty() {
            for warning in &report.warnings {
                out.push_str(warning);
                out.push('\n');
            }
            return Ok(out);
        }

        for result in report.matches {
            let n = result.node;
            out.push_str(&format!(
                "- `{}` `{}` at `{}:{}`",
                n.kind, n.name, n.file_path, n.start_line
            ));
            if let Some(sig) = n.signature.as_deref() {
                out.push_str(&format!(" — `{}`", sig.replace('\n', " ")));
            }
            out.push('\n');
            if let Some(code) = result.code {
                out.push_str("\n```");
                out.push_str(n.language.as_str());
                out.push('\n');
                out.push_str(&code);
                if !code.ends_with('\n') {
                    out.push('\n');
                }
                out.push_str("```\n\n");
            }
        }
        Ok(out)
    }

    pub fn build_context_report(
        &self,
        task: &str,
        max_nodes: i64,
        include_code: bool,
    ) -> Result<ContextReport> {
        let query = task.trim().to_string();
        let search_terms = context_search_terms(task);
        let results = self.find_context_nodes(&search_terms, max_nodes)?;
        let mut matches = Vec::new();
        let mut files: BTreeMap<String, ContextFileSummary> = BTreeMap::new();
        let mut symbols = Vec::new();

        for (result, search_term) in results {
            let code = if include_code {
                self.read_node_source(&result.node).ok()
            } else {
                None
            };
            let file = files
                .entry(result.node.file_path.clone())
                .or_insert_with(|| ContextFileSummary {
                    path: result.node.file_path.clone(),
                    language: result.node.language,
                    match_count: 0,
                    symbols: Vec::new(),
                });
            file.match_count += 1;
            if !file.symbols.iter().any(|name| name == &result.node.name) {
                file.symbols.push(result.node.name.clone());
            }
            symbols.push(ContextSymbolSummary {
                name: result.node.name.clone(),
                kind: result.node.kind,
                file_path: result.node.file_path.clone(),
                start_line: result.node.start_line,
            });
            matches.push(ContextMatch {
                reason: context_match_reason(task, &search_term),
                search_term,
                score: result.score,
                node: result.node,
                code,
            });
        }

        let mut warnings = Vec::new();
        if matches.is_empty() {
            warnings.push("No matching symbols or files were found.".to_string());
            warnings.push(
                "Try a concrete symbol name, file name, package/module name, or a shorter code term. For candidate discovery, run `cgz query --json <term>`."
                    .to_string(),
            );
        }

        Ok(ContextReport {
            query,
            search_terms,
            matches,
            files: files.into_values().collect(),
            symbols,
            warnings,
        })
    }

    pub fn build_explore_report(&self, query: &str, max_files: usize) -> Result<ExploreReport> {
        let max_files = max_files.clamp(1, 20);
        let stats = self.stats()?;
        let max_nodes = (max_files as i64 * 6).clamp(6, 120);
        let context = self.build_context_report(query, max_nodes, true)?;
        let mut source_files: BTreeMap<String, ExploreSourceFile> = BTreeMap::new();
        let mut relationships = Vec::new();
        let mut additional_files = BTreeSet::new();
        let mut seen_relationships = BTreeSet::new();
        let mut truncated = false;
        let mut warnings = context.warnings.clone();

        for matched in &context.matches {
            let file = source_files
                .entry(matched.node.file_path.clone())
                .or_insert_with(|| ExploreSourceFile {
                    path: matched.node.file_path.clone(),
                    language: matched.node.language,
                    sections: Vec::new(),
                });
            if file.sections.len() < 4 {
                let (code, section_truncated) =
                    bounded_source_section(matched.code.as_deref().unwrap_or_default(), 4_000);
                truncated |= section_truncated;
                file.sections.push(ExploreSourceSection {
                    symbol: matched.node.name.clone(),
                    kind: matched.node.kind,
                    start_line: matched.node.start_line,
                    end_line: matched.node.end_line,
                    reason: matched.reason.clone(),
                    code,
                    truncated: section_truncated,
                });
            } else {
                truncated = true;
            }

            self.collect_explore_relationships(
                &matched.node,
                &mut relationships,
                &mut seen_relationships,
                &mut additional_files,
            )?;
        }

        let mut source_files = source_files.into_values().collect::<Vec<_>>();
        source_files.sort_by(|a, b| a.path.cmp(&b.path));
        if source_files.len() > max_files {
            for file in source_files.drain(max_files..) {
                additional_files.insert(file.path);
            }
            truncated = true;
        }

        let source_paths = source_files
            .iter()
            .map(|file| file.path.as_str())
            .collect::<BTreeSet<_>>();
        let additional_files = additional_files
            .into_iter()
            .filter(|file| !source_paths.contains(file.as_str()))
            .take(max_files)
            .collect::<Vec<_>>();

        relationships.sort_by(|a, b| {
            a.file_path
                .cmp(&b.file_path)
                .then_with(|| a.source.cmp(&b.source))
                .then_with(|| a.kind.as_str().cmp(b.kind.as_str()))
                .then_with(|| a.target.cmp(&b.target))
        });
        if relationships.len() > max_files * 4 {
            relationships.truncate(max_files * 4);
            truncated = true;
        }

        if truncated {
            warnings.push("Explore output was truncated to fit the configured source and relationship budgets.".to_string());
        }

        Ok(ExploreReport {
            query: context.query,
            max_files,
            budget_guidance: explore_budget_guidance(stats.file_count),
            source_files,
            relationships,
            additional_files,
            warnings,
            truncated,
            truncated_reason: truncated.then(|| {
                "Some source sections, files, or relationships exceeded the explore budget."
                    .to_string()
            }),
        })
    }

    fn collect_explore_relationships(
        &self,
        node: &Node,
        relationships: &mut Vec<ExploreRelationship>,
        seen: &mut BTreeSet<String>,
        additional_files: &mut BTreeSet<String>,
    ) -> Result<()> {
        for edge in self.get_callees(&node.id, 1)?.into_iter().take(4) {
            if edge.edge.kind != EdgeKind::Contains {
                push_explore_relationship(
                    node,
                    edge,
                    "outgoing",
                    relationships,
                    seen,
                    additional_files,
                );
            }
        }
        for edge in self.get_callers(&node.id, 1)?.into_iter().take(4) {
            if edge.edge.kind != EdgeKind::Contains {
                push_explore_relationship(
                    node,
                    edge,
                    "incoming",
                    relationships,
                    seen,
                    additional_files,
                );
            }
        }
        for file in self
            .get_file_dependents(&node.file_path)?
            .into_iter()
            .take(4)
        {
            additional_files.insert(file);
        }
        Ok(())
    }

    fn find_context_nodes(
        &self,
        search_terms: &[String],
        max_nodes: i64,
    ) -> Result<Vec<(SearchResult, String)>> {
        let limit = max_nodes.max(1);
        let mut out = Vec::new();
        let mut seen = BTreeSet::new();

        for term in search_terms {
            if out.len() >= limit as usize {
                break;
            }
            let remaining = limit - out.len() as i64;
            let results = self.search_nodes(
                term,
                SearchOptions {
                    limit: remaining,
                    ..Default::default()
                },
            )?;
            for result in results {
                if seen.insert(result.node.id.clone()) {
                    out.push((result, term.clone()));
                    if out.len() >= limit as usize {
                        break;
                    }
                }
            }
        }

        Ok(out)
    }

    pub fn read_node_source(&self, node: &Node) -> Result<String> {
        let full = self.root.join(&node.file_path);
        let text =
            fs::read_to_string(&full).with_context(|| format!("reading {}", full.display()))?;
        let lines: Vec<&str> = text.lines().collect();
        let start = (node.start_line.saturating_sub(1) as usize).min(lines.len());
        let end = (node.end_line.max(node.start_line) as usize).min(lines.len());
        Ok(lines[start..end].join("\n"))
    }

    pub fn close(self) {}

    fn index_changed_file(&self, path: &Path, result: &mut IndexResult) -> Result<()> {
        let full = self.root.join(path);
        let content = match fs::read_to_string(&full) {
            Ok(content) => content,
            Err(err) => {
                push_index_error(result, categorize_read_error(&err), path, err.to_string());
                return Ok(());
            }
        };
        self.index_changed_file_with_content(path, content, None, result)
    }

    fn index_changed_file_with_content(
        &self,
        path: &Path,
        content: String,
        hash: Option<String>,
        result: &mut IndexResult,
    ) -> Result<()> {
        let path_key = normalized_path(path);
        let lang = detect_language(path, &content);
        if lang.is_unknown() {
            self.db.delete_file_index(&path_key)?;
            push_index_error(
                result,
                IndexErrorCategory::Unsupported,
                path,
                "unsupported file type".to_string(),
            );
            return Ok(());
        }
        if detect_parse_error(&content, lang) {
            self.db.delete_file_index(&path_key)?;
            push_index_error(
                result,
                IndexErrorCategory::Parse,
                path,
                format!("could not parse {lang} syntax"),
            );
            return Ok(());
        }
        let full = self.root.join(path);
        let metadata = fs::metadata(&full)?;
        let extraction = extract_from_source(path, &content, lang);
        let file = FileRecord {
            path: path_key,
            content_hash: hash.unwrap_or_else(|| content_hash(&content)),
            language: lang,
            size: metadata.len(),
            modified_at: metadata
                .modified()
                .ok()
                .and_then(system_time_ms)
                .unwrap_or_default(),
            indexed_at: now_ms(),
            node_count: extraction.nodes.len() as i64,
        };
        self.db.replace_file_index(
            &file,
            &extraction.nodes,
            &extraction.edges,
            &extraction.unresolved_references,
        )?;
        result.files_indexed += 1;
        result.nodes_created += extraction.nodes.len() as i64;
        result.edges_created += extraction.edges.len() as i64;
        Ok(())
    }

    fn scan_files(&self) -> Result<Vec<PathBuf>> {
        let mut out = Vec::new();
        let walker = ignore::WalkBuilder::new(&self.root)
            .hidden(false)
            .git_ignore(true)
            .git_global(true)
            .git_exclude(true)
            .build();
        for entry in walker {
            let entry = entry?;
            if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
                continue;
            }
            let rel = entry
                .path()
                .strip_prefix(&self.root)
                .unwrap_or(entry.path())
                .to_path_buf();
            if rel.components().any(|c| c.as_os_str() == CODEGRAPH_DIR) {
                continue;
            }
            if should_include_file(&rel, &self.config) {
                out.push(rel);
            }
        }
        out.sort();
        Ok(out)
    }
}

fn categorize_read_error(err: &std::io::Error) -> IndexErrorCategory {
    if err.kind() == std::io::ErrorKind::WouldBlock {
        IndexErrorCategory::Lock
    } else {
        IndexErrorCategory::Read
    }
}

fn push_index_error(
    result: &mut IndexResult,
    category: IndexErrorCategory,
    path: &Path,
    message: String,
) {
    result.files_errored += 1;
    result.errors.push(IndexError {
        category,
        path: path.display().to_string(),
        message,
    });
}

fn file_list_entry(file: FileRecord, include_metadata: bool) -> FileListEntry {
    FileListEntry {
        path: file.path,
        language: file.language,
        node_count: file.node_count,
        size: include_metadata.then_some(file.size),
        modified_at: include_metadata.then_some(file.modified_at),
        indexed_at: include_metadata.then_some(file.indexed_at),
    }
}

fn normalized_path(path: &Path) -> String {
    path.to_string_lossy().replace('\\', "/")
}

fn file_path_matches_filter(filter: &str, path: &str) -> bool {
    let filter = filter.trim_matches('/');
    filter.is_empty() || path == filter || path.starts_with(&format!("{filter}/"))
}

fn grouped_file_entries(files: &[FileListEntry]) -> Vec<FileLanguageGroup> {
    let mut grouped: BTreeMap<String, (Language, Vec<FileListEntry>)> = BTreeMap::new();
    for file in files {
        grouped
            .entry(file.language.as_str().to_string())
            .or_insert_with(|| (file.language, Vec::new()))
            .1
            .push(file.clone());
    }
    grouped
        .into_values()
        .map(|(language, files)| FileLanguageGroup {
            language,
            count: files.len(),
            files,
        })
        .collect()
}

fn build_file_tree(files: &[FileListEntry]) -> Vec<FileTreeEntry> {
    let mut roots = Vec::new();
    for file in files {
        insert_tree_file(
            &mut roots,
            file,
            &file.path.split('/').collect::<Vec<_>>(),
            0,
            "",
        );
    }
    roots
}

fn insert_tree_file(
    entries: &mut Vec<FileTreeEntry>,
    file: &FileListEntry,
    parts: &[&str],
    index: usize,
    parent: &str,
) {
    let Some(name) = parts.get(index) else {
        return;
    };
    let path = if parent.is_empty() {
        (*name).to_string()
    } else {
        format!("{parent}/{name}")
    };
    let is_file = index + 1 == parts.len();
    let pos = entries
        .iter()
        .position(|entry| entry.name == *name && entry.kind == if is_file { "file" } else { "dir" })
        .unwrap_or_else(|| {
            entries.push(FileTreeEntry {
                name: (*name).to_string(),
                path: path.clone(),
                kind: if is_file { "file" } else { "dir" }.to_string(),
                language: is_file.then_some(file.language),
                node_count: is_file.then_some(file.node_count),
                size: file.size.filter(|_| is_file),
                children: Vec::new(),
            });
            entries.len() - 1
        });
    if !is_file {
        insert_tree_file(&mut entries[pos].children, file, parts, index + 1, &path);
    }
    entries.sort_by(|a, b| {
        a.kind
            .cmp(&b.kind)
            .then_with(|| a.name.cmp(&b.name))
            .then_with(|| a.path.cmp(&b.path))
    });
}

fn file_pattern_matches(pattern: &str, path: &str) -> bool {
    if pattern.is_empty() {
        return true;
    }
    if let Some(ext) = pattern.strip_prefix("*.") {
        return path.ends_with(&format!(".{ext}"));
    }
    if let Some(ext) = pattern.strip_prefix("**/*.") {
        return path.ends_with(&format!(".{ext}"));
    }
    if pattern.contains('*') {
        let parts = pattern.split('*').collect::<Vec<_>>();
        let mut rest = path;
        for (idx, part) in parts.iter().enumerate() {
            if part.is_empty() {
                continue;
            }
            if idx == 0 && !rest.starts_with(part) {
                return false;
            }
            let Some(found) = rest.find(part) else {
                return false;
            };
            rest = &rest[found + part.len()..];
        }
        return pattern.ends_with('*') || parts.last().is_some_and(|suffix| path.ends_with(suffix));
    }
    path.contains(pattern)
}

fn bounded_source_section(source: &str, max_chars: usize) -> (String, bool) {
    if source.chars().count() <= max_chars {
        return (source.to_string(), false);
    }
    let mut out = source.chars().take(max_chars).collect::<String>();
    out.push_str("\n// [section truncated]");
    (out, true)
}

fn push_explore_relationship(
    root: &Node,
    edge: NodeEdge,
    direction: &str,
    relationships: &mut Vec<ExploreRelationship>,
    seen: &mut BTreeSet<String>,
    additional_files: &mut BTreeSet<String>,
) {
    let key = format!(
        "{}:{}:{}",
        edge.edge.source,
        edge.edge.kind.as_str(),
        edge.edge.target
    );
    if !seen.insert(key) {
        return;
    }
    if edge.node.file_path != root.file_path {
        additional_files.insert(edge.node.file_path.clone());
    }
    let (source, target) = if direction == "outgoing" {
        (root.name.clone(), edge.node.name.clone())
    } else {
        (edge.node.name.clone(), root.name.clone())
    };
    relationships.push(ExploreRelationship {
        source,
        target,
        kind: edge.edge.kind,
        file_path: edge.node.file_path,
        direction: direction.to_string(),
    });
}

fn explore_budget_guidance(file_count: i64) -> String {
    match file_count {
        0..=50 => "Small project: one or two focused explore calls should usually be enough.",
        51..=250 => {
            "Medium project: use a few targeted explore calls around concrete symbols or files."
        }
        _ => {
            "Large project: keep explore calls narrow and follow up by file, symbol, or subsystem."
        }
    }
    .to_string()
}

fn context_search_terms(task: &str) -> Vec<String> {
    let mut terms = Vec::new();
    let mut seen = BTreeSet::new();
    push_context_term(task.trim(), &mut terms, &mut seen);

    for raw in task.split(|c: char| {
        !(c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '/' || c == '.' || c == ':')
    }) {
        let term = raw.trim_matches(|c: char| {
            !(c.is_ascii_alphanumeric() || c == '_' || c == '/' || c == '.' || c == ':')
        });
        if is_useful_context_term(term) {
            push_context_term(term, &mut terms, &mut seen);
        }
    }

    terms
}

fn context_match_reason(task: &str, search_term: &str) -> String {
    if task.trim().eq_ignore_ascii_case(search_term) {
        "matched the full context query".to_string()
    } else {
        format!("matched extracted task term `{search_term}`")
    }
}

fn push_context_term(term: &str, terms: &mut Vec<String>, seen: &mut BTreeSet<String>) {
    if term.is_empty() {
        return;
    }
    let key = term.to_ascii_lowercase();
    if seen.insert(key) {
        terms.push(term.to_string());
    }
}

fn is_useful_context_term(term: &str) -> bool {
    if term.len() < 3 {
        return false;
    }
    if CONTEXT_STOP_WORDS.contains(&term.to_ascii_lowercase().as_str()) {
        return false;
    }
    term.contains('_')
        || term.contains('/')
        || term.contains('.')
        || term.contains(':')
        || term.chars().any(|c| c.is_ascii_digit())
        || term.chars().any(|c| c.is_ascii_uppercase())
        || term.len() >= 5
}

const CONTEXT_STOP_WORDS: &[&str] = &[
    "about",
    "after",
    "before",
    "build",
    "change",
    "check",
    "code",
    "context",
    "debug",
    "error",
    "feature",
    "files",
    "fix",
    "from",
    "handle",
    "how",
    "implement",
    "implemented",
    "invalid",
    "is",
    "issue",
    "order",
    "query",
    "return",
    "should",
    "task",
    "test",
    "tests",
    "update",
    "valid",
    "validation",
    "what",
    "when",
    "where",
    "which",
    "who",
    "why",
    "with",
];

pub fn is_initialized(root: impl AsRef<Path>) -> bool {
    root.as_ref()
        .join(CODEGRAPH_DIR)
        .join(DATABASE_FILE)
        .exists()
}

pub fn find_nearest_codegraph_root(start: impl AsRef<Path>) -> Option<PathBuf> {
    let mut cur = start
        .as_ref()
        .canonicalize()
        .unwrap_or_else(|_| start.as_ref().to_path_buf());
    if cur.is_file() {
        cur.pop();
    }
    loop {
        if is_initialized(&cur) {
            return Some(cur);
        }
        if !cur.pop() {
            return None;
        }
    }
}

fn content_hash(content: &str) -> String {
    let mut h = Sha256::new();
    h.update(content.as_bytes());
    format!("{:x}", h.finalize())
}

fn now_ms() -> i64 {
    system_time_ms(std::time::SystemTime::now()).unwrap_or_default()
}

fn system_time_ms(t: std::time::SystemTime) -> Option<i64> {
    t.duration_since(std::time::UNIX_EPOCH)
        .ok()
        .map(|d| d.as_millis() as i64)
}

fn is_test_file(file: &str) -> bool {
    let basename = file.rsplit('/').next().unwrap_or(file);
    file.ends_with(".mbt.md")
        || basename.ends_with("_test.mbt")
        || basename.ends_with("_wbtest.mbt")
        || file.contains("/__tests__/")
        || file.contains("/test/")
        || file.contains("/tests/")
        || file.contains("/e2e/")
        || file.contains("/spec/")
        || file.contains(".test.")
        || file.contains(".spec.")
}

fn moonbit_same_package_tests(file: &str, indexed_files: &[FileRecord]) -> Vec<String> {
    if is_test_file(file) || !is_moonbit_source_file(file) {
        return Vec::new();
    }
    let Some(package_dir) = moonbit_package_dir(file, indexed_files) else {
        return Vec::new();
    };
    indexed_files
        .iter()
        .filter(|record| record.language == Language::MoonBit)
        .filter(|record| is_test_file(&record.path))
        .filter(|record| {
            moonbit_package_dir(&record.path, indexed_files).as_deref() == Some(&package_dir)
        })
        .map(|record| record.path.clone())
        .collect()
}

#[derive(Debug, Default)]
struct MoonBitPackageGraph {
    package_by_dir: BTreeMap<String, MoonBitPackage>,
    reverse_imports: BTreeMap<String, BTreeSet<String>>,
}

#[derive(Debug)]
struct MoonBitPackage {
    name: String,
    imports: Vec<String>,
    tests: Vec<String>,
}

impl MoonBitPackageGraph {
    fn from_root(root: &Path, indexed_files: &[FileRecord]) -> Self {
        let module_name = moonbit_module_name(root, indexed_files);
        let mut package_by_dir = BTreeMap::new();

        for record in indexed_files {
            if !is_moonbit_package_file(&record.path) {
                continue;
            }
            let dir = parent_dir(&record.path);
            let source = fs::read_to_string(root.join(&record.path)).unwrap_or_default();
            let (name, imports) = parse_moonbit_package_metadata(&source);
            let package_name =
                name.unwrap_or_else(|| moonbit_package_name_from_dir(module_name.as_deref(), &dir));
            package_by_dir.insert(
                dir.clone(),
                MoonBitPackage {
                    name: package_name,
                    imports,
                    tests: Vec::new(),
                },
            );
        }

        let package_dirs: Vec<String> = package_by_dir.keys().cloned().collect();
        for record in indexed_files {
            if record.language != Language::MoonBit || !is_test_file(&record.path) {
                continue;
            }
            if let Some(package_dir) = moonbit_package_dir_from_dirs(&record.path, &package_dirs) {
                if let Some(package) = package_by_dir.get_mut(&package_dir) {
                    package.tests.push(record.path.clone());
                }
            }
        }

        let local_names: BTreeSet<String> = package_by_dir
            .values()
            .map(|package| package.name.clone())
            .collect();
        let mut reverse_imports: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
        for package in package_by_dir.values() {
            for import in &package.imports {
                if local_names.contains(import) {
                    reverse_imports
                        .entry(import.clone())
                        .or_default()
                        .insert(package.name.clone());
                }
            }
        }

        Self {
            package_by_dir,
            reverse_imports,
        }
    }

    fn dependent_package_tests(&self, file: &str) -> Vec<String> {
        if is_test_file(file) || !is_moonbit_source_file(file) {
            return Vec::new();
        }
        let Some(changed_package) = self.package_for_file(file) else {
            return Vec::new();
        };

        let mut pending: Vec<String> = self
            .reverse_imports
            .get(&changed_package.name)
            .map(|deps| deps.iter().cloned().collect())
            .unwrap_or_default();
        let mut dependent_names = BTreeSet::new();
        while let Some(package_name) = pending.pop() {
            if !dependent_names.insert(package_name.clone()) {
                continue;
            }
            if let Some(next) = self.reverse_imports.get(&package_name) {
                pending.extend(next.iter().cloned());
            }
        }

        self.package_by_dir
            .values()
            .filter(|package| dependent_names.contains(&package.name))
            .flat_map(|package| package.tests.clone())
            .collect()
    }

    fn package_for_file(&self, file: &str) -> Option<&MoonBitPackage> {
        let package_dir = moonbit_package_dir_from_dirs(file, self.package_by_dir.keys())?;
        self.package_by_dir.get(&package_dir)
    }
}

fn moonbit_module_name(root: &Path, indexed_files: &[FileRecord]) -> Option<String> {
    indexed_files
        .iter()
        .filter(|record| record.path.ends_with("moon.mod.json"))
        .min_by_key(|record| record.path.matches('/').count())
        .and_then(|record| fs::read_to_string(root.join(&record.path)).ok())
        .and_then(|source| {
            serde_json::from_str::<serde_json::Value>(&source)
                .ok()
                .and_then(|json| {
                    json.get("name")
                        .and_then(|value| value.as_str())
                        .map(str::to_string)
                })
        })
}

fn parse_moonbit_package_metadata(source: &str) -> (Option<String>, Vec<String>) {
    let Ok(json) = serde_json::from_str::<serde_json::Value>(source) else {
        return (None, Vec::new());
    };
    let name = json
        .get("name")
        .and_then(|value| value.as_str())
        .map(str::to_string);
    let mut imports = Vec::new();
    if let Some(value) = json.get("import").or_else(|| json.get("imports")) {
        collect_moonbit_imports(value, &mut imports);
    }
    (name, imports)
}

fn collect_moonbit_imports(value: &serde_json::Value, imports: &mut Vec<String>) {
    match value {
        serde_json::Value::String(import) => imports.push(import.clone()),
        serde_json::Value::Array(values) => {
            for value in values {
                collect_moonbit_imports(value, imports);
            }
        }
        serde_json::Value::Object(values) => {
            for (alias, value) in values {
                imports.push(value.as_str().unwrap_or(alias).to_string());
            }
        }
        _ => {}
    }
}

fn moonbit_package_name_from_dir(module_name: Option<&str>, dir: &str) -> String {
    match (module_name, dir.is_empty()) {
        (Some(module), true) => module.to_string(),
        (Some(module), false) => format!("{module}/{dir}"),
        (None, true) => "moonbit-package".to_string(),
        (None, false) => dir.to_string(),
    }
}

fn is_moonbit_source_file(file: &str) -> bool {
    file.ends_with(".mbt") || file.ends_with(".mbti") || file.ends_with(".mbt.md")
}

fn is_moonbit_package_file(file: &str) -> bool {
    file.ends_with("moon.pkg.json") || file.ends_with("moon.pkg")
}

fn moonbit_package_dir(file: &str, indexed_files: &[FileRecord]) -> Option<String> {
    let dirs: Vec<String> = indexed_files
        .iter()
        .filter(|record| is_moonbit_package_file(&record.path))
        .map(|record| parent_dir(&record.path))
        .collect();
    moonbit_package_dir_from_dirs(file, &dirs)
}

fn moonbit_package_dir_from_dirs<'a, I>(file: &str, dirs: I) -> Option<String>
where
    I: IntoIterator<Item = &'a String>,
{
    let mut best: Option<&str> = None;
    for dir in dirs {
        if (dir.is_empty() || file == dir || file.starts_with(&format!("{dir}/")))
            && best
                .map(|current| dir.len() > current.len())
                .unwrap_or(true)
        {
            best = Some(dir);
        }
    }
    best.map(str::to_string)
}

fn parent_dir(file: &str) -> String {
    file.rsplit_once('/')
        .map(|(dir, _)| dir.to_string())
        .unwrap_or_default()
}

fn rust_name_heuristic_tests(file: &str, indexed_files: &[FileRecord]) -> Vec<String> {
    let Some(changed) = indexed_files.iter().find(|record| record.path == file) else {
        return Vec::new();
    };
    if changed.language != Language::Rust || is_test_file(file) {
        return Vec::new();
    }
    let Some(stem) = file
        .rsplit('/')
        .next()
        .and_then(|name| name.strip_suffix(".rs"))
    else {
        return Vec::new();
    };
    if stem.len() < 3 {
        return Vec::new();
    }
    indexed_files
        .iter()
        .filter(|record| record.language == Language::Rust)
        .filter(|record| is_test_file(&record.path))
        .filter(|record| rust_test_path_matches_stem(&record.path, stem))
        .map(|record| record.path.clone())
        .collect()
}

fn rust_test_path_matches_stem(test_path: &str, stem: &str) -> bool {
    test_path
        .rsplit('/')
        .next()
        .unwrap_or(test_path)
        .strip_suffix(".rs")
        .map(|name| {
            name == stem
                || name.ends_with(&format!("_{stem}"))
                || name.starts_with(&format!("{stem}_"))
                || name.contains(&format!("_{stem}_"))
        })
        .unwrap_or(false)
}

fn rust_workspace_heuristic_tests(
    root: &Path,
    file: &str,
    indexed_files: &[FileRecord],
) -> Vec<String> {
    let Some(changed) = indexed_files.iter().find(|record| record.path == file) else {
        return Vec::new();
    };
    if changed.language != Language::Rust || is_test_file(file) {
        return Vec::new();
    }
    let Some(crate_root) = rust_crate_root(file) else {
        return Vec::new();
    };
    indexed_files
        .iter()
        .filter(|record| record.language == Language::Rust)
        .filter(|record| record.path != file)
        .filter(|record| rust_crate_root(&record.path).as_deref() == Some(crate_root.as_str()))
        .filter(|record| {
            is_test_file(&record.path) || rust_file_contains_inline_tests(root, &record.path)
        })
        .map(|record| record.path.clone())
        .collect()
}

fn rust_crate_root(file: &str) -> Option<String> {
    let parts: Vec<&str> = file.split('/').collect();
    if parts.len() >= 2 && parts[0] == "crates" {
        return Some(format!("{}/{}", parts[0], parts[1]));
    }
    parts
        .iter()
        .position(|part| *part == "src")
        .map(|index| parts[..index].join("/"))
}

fn rust_file_contains_inline_tests(root: &Path, file: &str) -> bool {
    fs::read_to_string(root.join(file))
        .map(|text| text.contains("#[cfg(test)]") || text.contains("#[test]"))
        .unwrap_or(false)
}