Skip to main content

codegraph/
lib.rs

1pub mod config;
2pub mod db;
3pub mod extraction;
4pub mod graph;
5pub mod installer;
6pub mod mcp;
7pub mod types;
8pub mod watcher;
9
10use anyhow::{anyhow, Context, Result};
11use config::{load_config, save_config, CodeGraphConfig};
12use db::Database;
13use extraction::{detect_language, detect_parse_error, extract_from_source, should_include_file};
14use graph::{GraphTraverser, Subgraph};
15use sha2::{Digest, Sha256};
16use std::collections::{BTreeMap, BTreeSet};
17use std::fs;
18use std::path::{Path, PathBuf};
19use types::{
20    AffectedDebugEntry, AffectedMatchSources, AffectedReport, ContextFileSummary, ContextMatch,
21    ContextReport, ContextSymbolSummary, EdgeKind, ExploreRelationship, ExploreReport,
22    ExploreSourceFile, ExploreSourceSection, FileLanguageGroup, FileListEntry, FileListFormat,
23    FileListOptions, FileListReport, FileRecord, FileTreeEntry, GraphPath, GraphStats, IndexError,
24    IndexErrorCategory, IndexResult, Language, Node, NodeEdge, SearchOptions, SearchResult,
25};
26
27pub const CODEGRAPH_DIR: &str = ".codegraph";
28pub const DATABASE_FILE: &str = "codegraph.db";
29
30pub struct CodeGraph {
31    root: PathBuf,
32    config: CodeGraphConfig,
33    db: Database,
34}
35
36impl CodeGraph {
37    pub fn init(root: impl AsRef<Path>) -> Result<Self> {
38        let root = root
39            .as_ref()
40            .canonicalize()
41            .unwrap_or_else(|_| root.as_ref().to_path_buf());
42        let dir = root.join(CODEGRAPH_DIR);
43        if dir.exists() {
44            return Err(anyhow!(
45                "CodeGraph already initialized in {}",
46                root.display()
47            ));
48        }
49        fs::create_dir_all(&dir).with_context(|| format!("creating {}", dir.display()))?;
50        let config = CodeGraphConfig::default_for_root(".");
51        save_config(&root, &config)?;
52        let db = Database::initialize(dir.join(DATABASE_FILE))?;
53        Ok(Self { root, config, db })
54    }
55
56    pub fn open(root: impl AsRef<Path>) -> Result<Self> {
57        let root = find_nearest_codegraph_root(root.as_ref())
58            .ok_or_else(|| anyhow!("CodeGraph not initialized in {}", root.as_ref().display()))?;
59        let config = load_config(&root)?;
60        let db = Database::open(root.join(CODEGRAPH_DIR).join(DATABASE_FILE))?;
61        Ok(Self { root, config, db })
62    }
63
64    pub fn root(&self) -> &Path {
65        &self.root
66    }
67
68    pub fn config(&self) -> &CodeGraphConfig {
69        &self.config
70    }
71
72    pub fn index_all(&mut self) -> Result<IndexResult> {
73        let start = std::time::Instant::now();
74        self.db.clear_all()?;
75        let files = self.scan_files()?;
76        let mut result = IndexResult::default();
77
78        for path in files {
79            self.index_changed_file(&path, &mut result)?;
80        }
81
82        self.db.clear_resolved_reference_edges()?;
83        self.db.resolve_references(&self.root)?;
84        result.edges_created = self.db.edge_count()?;
85        result.success = result.files_errored == 0;
86        result.duration_ms = start.elapsed().as_millis() as i64;
87        Ok(result)
88    }
89
90    pub fn sync(&mut self) -> Result<IndexResult> {
91        let start = std::time::Instant::now();
92        let files = self.scan_files()?;
93        let current_paths = files
94            .iter()
95            .map(|path| normalized_path(path))
96            .collect::<BTreeSet<_>>();
97        let existing = self
98            .db
99            .get_all_files()?
100            .into_iter()
101            .map(|file| (file.path.clone(), file))
102            .collect::<BTreeMap<_, _>>();
103        let mut result = IndexResult::default();
104        let mut changed = false;
105
106        for path in existing.keys() {
107            if !current_paths.contains(path) {
108                self.db.delete_file_index(path)?;
109                result.files_deleted += 1;
110                changed = true;
111            }
112        }
113
114        for path in files {
115            let path_key = normalized_path(&path);
116            let full = self.root.join(&path);
117            let content = match fs::read_to_string(&full) {
118                Ok(content) => content,
119                Err(err) => {
120                    push_index_error(
121                        &mut result,
122                        categorize_read_error(&err),
123                        &path,
124                        err.to_string(),
125                    );
126                    continue;
127                }
128            };
129            let hash = content_hash(&content);
130            if existing
131                .get(&path_key)
132                .is_some_and(|file| file.content_hash == hash)
133            {
134                result.files_skipped += 1;
135                continue;
136            }
137            self.index_changed_file_with_content(&path, content, Some(hash), &mut result)?;
138            changed = true;
139        }
140
141        if changed {
142            self.db.clear_resolved_reference_edges()?;
143            self.db.resolve_references(&self.root)?;
144        }
145        result.edges_created = self.db.edge_count()?;
146        result.success = result.files_errored == 0;
147        result.duration_ms = start.elapsed().as_millis() as i64;
148        Ok(result)
149    }
150
151    pub fn stats(&self) -> Result<GraphStats> {
152        self.db.stats()
153    }
154
155    pub fn search_nodes(&self, query: &str, options: SearchOptions) -> Result<Vec<SearchResult>> {
156        self.db.search_nodes(query, options)
157    }
158
159    pub fn get_node(&self, id: &str) -> Result<Option<Node>> {
160        self.db.get_node(id)
161    }
162
163    pub fn get_callers(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
164        GraphTraverser::new(&self.db).get_callers(node_id, max_depth)
165    }
166
167    pub fn get_callees(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
168        GraphTraverser::new(&self.db).get_callees(node_id, max_depth)
169    }
170
171    pub fn get_impact_radius(&self, node_id: &str, max_depth: usize) -> Result<Subgraph> {
172        GraphTraverser::new(&self.db).get_impact_radius(node_id, max_depth)
173    }
174
175    pub fn find_paths(
176        &self,
177        from_node_id: &str,
178        to_node_id: &str,
179        max_depth: usize,
180        max_paths: usize,
181    ) -> Result<Vec<GraphPath>> {
182        GraphTraverser::new(&self.db).find_paths(from_node_id, to_node_id, max_depth, max_paths)
183    }
184
185    pub fn get_file_dependents(&self, file_path: &str) -> Result<Vec<String>> {
186        self.db.get_file_dependents(file_path)
187    }
188
189    pub fn get_all_files(&self) -> Result<Vec<FileRecord>> {
190        self.db.get_all_files()
191    }
192
193    pub fn list_files(&self, options: FileListOptions) -> Result<FileListReport> {
194        let max_depth = options.max_depth.map(|depth| depth.clamp(1, 20));
195        let mut files = self
196            .get_all_files()?
197            .into_iter()
198            .filter(|file| {
199                options
200                    .path_filter
201                    .as_deref()
202                    .map(|path| file_path_matches_filter(path, &file.path))
203                    .unwrap_or(true)
204            })
205            .filter(|file| {
206                options
207                    .pattern
208                    .as_deref()
209                    .map(|pattern| file_pattern_matches(pattern, &file.path))
210                    .unwrap_or(true)
211            })
212            .filter(|file| {
213                max_depth
214                    .map(|depth| file.path.split('/').count() <= depth)
215                    .unwrap_or(true)
216            })
217            .map(|file| file_list_entry(file, options.include_metadata))
218            .collect::<Vec<_>>();
219        files.sort_by(|a, b| a.path.cmp(&b.path));
220
221        let groups = if options.format == FileListFormat::Grouped {
222            grouped_file_entries(&files)
223        } else {
224            Vec::new()
225        };
226        let tree = if options.format == FileListFormat::Tree {
227            build_file_tree(&files)
228        } else {
229            Vec::new()
230        };
231        let format = match options.format {
232            FileListFormat::Grouped => "grouped",
233            FileListFormat::Flat => "flat",
234            FileListFormat::Tree => "tree",
235        }
236        .to_string();
237
238        Ok(FileListReport {
239            format,
240            path_filter: options.path_filter,
241            pattern: options.pattern,
242            include_metadata: options.include_metadata,
243            max_depth,
244            total_files: files.len(),
245            files,
246            groups,
247            tree,
248        })
249    }
250
251    pub fn build_affected_report(&self, files: &[String]) -> Result<AffectedReport> {
252        let indexed_files = self.get_all_files()?;
253        let moonbit_packages = MoonBitPackageGraph::from_root(&self.root, &indexed_files);
254        let mut affected = BTreeSet::new();
255        let mut debug = Vec::new();
256        let mut warnings = Vec::new();
257
258        for file in files {
259            if is_test_file(file) {
260                affected.insert(file.clone());
261                debug.push(AffectedDebugEntry {
262                    changed_file: file.clone(),
263                    reason: "changed file is a test file".to_string(),
264                    matched_tests: vec![file.clone()],
265                    matched_by: AffectedMatchSources {
266                        direct_test_input: vec![file.clone()],
267                        import_dependents: Vec::new(),
268                        moonbit_same_package: Vec::new(),
269                        moonbit_package_dependents: Vec::new(),
270                        rust_name_heuristic: Vec::new(),
271                        rust_workspace_heuristic: Vec::new(),
272                    },
273                });
274                continue;
275            }
276
277            let mut matched = BTreeSet::new();
278            let mut import_dependents = BTreeSet::new();
279            for dep in self.get_file_dependents(file)? {
280                if is_test_file(&dep) {
281                    import_dependents.insert(dep.clone());
282                    matched.insert(dep.clone());
283                    affected.insert(dep);
284                }
285            }
286
287            let moonbit_tests: BTreeSet<String> = moonbit_same_package_tests(file, &indexed_files)
288                .into_iter()
289                .collect();
290            for test in &moonbit_tests {
291                matched.insert(test.clone());
292                affected.insert(test.clone());
293            }
294            let moonbit_package_tests: BTreeSet<String> = moonbit_packages
295                .dependent_package_tests(file)
296                .into_iter()
297                .collect();
298            for test in &moonbit_package_tests {
299                matched.insert(test.clone());
300                affected.insert(test.clone());
301            }
302            let rust_tests: BTreeSet<String> = rust_name_heuristic_tests(file, &indexed_files)
303                .into_iter()
304                .collect();
305            for test in &rust_tests {
306                matched.insert(test.clone());
307                affected.insert(test.clone());
308            }
309            let rust_workspace_tests: BTreeSet<String> =
310                rust_workspace_heuristic_tests(&self.root, file, &indexed_files)
311                    .into_iter()
312                    .collect();
313            for test in &rust_workspace_tests {
314                matched.insert(test.clone());
315                affected.insert(test.clone());
316            }
317
318            if matched.is_empty() {
319                warnings.push(format!(
320                    "{file}: no import-dependent tests, MoonBit same-package tests, MoonBit package-dependent tests, Rust name-heuristic tests, or Rust workspace tests found"
321                ));
322            }
323            debug.push(AffectedDebugEntry {
324                changed_file: file.clone(),
325                reason: if matched.is_empty() {
326                    "no import-dependent tests, MoonBit same-package tests, MoonBit package-dependent tests, Rust name-heuristic tests, or Rust workspace tests found".to_string()
327                } else {
328                    "matched import-dependent tests, MoonBit same-package tests, MoonBit package-dependent tests, Rust name-heuristic tests, and/or Rust workspace tests".to_string()
329                },
330                matched_tests: matched.into_iter().collect(),
331                matched_by: AffectedMatchSources {
332                    direct_test_input: Vec::new(),
333                    import_dependents: import_dependents.into_iter().collect(),
334                    moonbit_same_package: moonbit_tests.into_iter().collect(),
335                    moonbit_package_dependents: moonbit_package_tests.into_iter().collect(),
336                    rust_name_heuristic: rust_tests.into_iter().collect(),
337                    rust_workspace_heuristic: rust_workspace_tests.into_iter().collect(),
338                },
339            });
340        }
341
342        Ok(AffectedReport {
343            changed_files: files.to_vec(),
344            affected_tests: affected.into_iter().collect(),
345            debug,
346            warnings,
347        })
348    }
349
350    pub fn build_context(&self, task: &str, max_nodes: i64, include_code: bool) -> Result<String> {
351        let report = self.build_context_report(task, max_nodes, include_code)?;
352        let mut out = format!("## Context: {task}\n\n");
353        if report.matches.is_empty() {
354            for warning in &report.warnings {
355                out.push_str(warning);
356                out.push('\n');
357            }
358            return Ok(out);
359        }
360
361        for result in report.matches {
362            let n = result.node;
363            out.push_str(&format!(
364                "- `{}` `{}` at `{}:{}`",
365                n.kind, n.name, n.file_path, n.start_line
366            ));
367            if let Some(sig) = n.signature.as_deref() {
368                out.push_str(&format!(" — `{}`", sig.replace('\n', " ")));
369            }
370            out.push('\n');
371            if let Some(code) = result.code {
372                out.push_str("\n```");
373                out.push_str(n.language.as_str());
374                out.push('\n');
375                out.push_str(&code);
376                if !code.ends_with('\n') {
377                    out.push('\n');
378                }
379                out.push_str("```\n\n");
380            }
381        }
382        Ok(out)
383    }
384
385    pub fn build_context_report(
386        &self,
387        task: &str,
388        max_nodes: i64,
389        include_code: bool,
390    ) -> Result<ContextReport> {
391        let query = task.trim().to_string();
392        let search_terms = context_search_terms(task);
393        let results = self.find_context_nodes(&search_terms, max_nodes)?;
394        let mut matches = Vec::new();
395        let mut files: BTreeMap<String, ContextFileSummary> = BTreeMap::new();
396        let mut symbols = Vec::new();
397
398        for (result, search_term) in results {
399            let code = if include_code {
400                self.read_node_source(&result.node).ok()
401            } else {
402                None
403            };
404            let file = files
405                .entry(result.node.file_path.clone())
406                .or_insert_with(|| ContextFileSummary {
407                    path: result.node.file_path.clone(),
408                    language: result.node.language,
409                    match_count: 0,
410                    symbols: Vec::new(),
411                });
412            file.match_count += 1;
413            if !file.symbols.iter().any(|name| name == &result.node.name) {
414                file.symbols.push(result.node.name.clone());
415            }
416            symbols.push(ContextSymbolSummary {
417                name: result.node.name.clone(),
418                kind: result.node.kind,
419                file_path: result.node.file_path.clone(),
420                start_line: result.node.start_line,
421            });
422            matches.push(ContextMatch {
423                reason: context_match_reason(task, &search_term),
424                search_term,
425                score: result.score,
426                node: result.node,
427                code,
428            });
429        }
430
431        let mut warnings = Vec::new();
432        if matches.is_empty() {
433            warnings.push("No matching symbols or files were found.".to_string());
434            warnings.push(
435                "Try a concrete symbol name, file name, package/module name, or a shorter code term. For candidate discovery, run `cgz query --json <term>`."
436                    .to_string(),
437            );
438        }
439
440        Ok(ContextReport {
441            query,
442            search_terms,
443            matches,
444            files: files.into_values().collect(),
445            symbols,
446            warnings,
447        })
448    }
449
450    pub fn build_explore_report(&self, query: &str, max_files: usize) -> Result<ExploreReport> {
451        let max_files = max_files.clamp(1, 20);
452        let stats = self.stats()?;
453        let max_nodes = (max_files as i64 * 6).clamp(6, 120);
454        let context = self.build_context_report(query, max_nodes, true)?;
455        let mut source_files: BTreeMap<String, ExploreSourceFile> = BTreeMap::new();
456        let mut relationships = Vec::new();
457        let mut additional_files = BTreeSet::new();
458        let mut seen_relationships = BTreeSet::new();
459        let mut truncated = false;
460        let mut warnings = context.warnings.clone();
461
462        for matched in &context.matches {
463            let file = source_files
464                .entry(matched.node.file_path.clone())
465                .or_insert_with(|| ExploreSourceFile {
466                    path: matched.node.file_path.clone(),
467                    language: matched.node.language,
468                    sections: Vec::new(),
469                });
470            if file.sections.len() < 4 {
471                let (code, section_truncated) =
472                    bounded_source_section(matched.code.as_deref().unwrap_or_default(), 4_000);
473                truncated |= section_truncated;
474                file.sections.push(ExploreSourceSection {
475                    symbol: matched.node.name.clone(),
476                    kind: matched.node.kind,
477                    start_line: matched.node.start_line,
478                    end_line: matched.node.end_line,
479                    reason: matched.reason.clone(),
480                    code,
481                    truncated: section_truncated,
482                });
483            } else {
484                truncated = true;
485            }
486
487            self.collect_explore_relationships(
488                &matched.node,
489                &mut relationships,
490                &mut seen_relationships,
491                &mut additional_files,
492            )?;
493        }
494
495        let mut source_files = source_files.into_values().collect::<Vec<_>>();
496        source_files.sort_by(|a, b| a.path.cmp(&b.path));
497        if source_files.len() > max_files {
498            for file in source_files.drain(max_files..) {
499                additional_files.insert(file.path);
500            }
501            truncated = true;
502        }
503
504        let source_paths = source_files
505            .iter()
506            .map(|file| file.path.as_str())
507            .collect::<BTreeSet<_>>();
508        let additional_files = additional_files
509            .into_iter()
510            .filter(|file| !source_paths.contains(file.as_str()))
511            .take(max_files)
512            .collect::<Vec<_>>();
513
514        relationships.sort_by(|a, b| {
515            a.file_path
516                .cmp(&b.file_path)
517                .then_with(|| a.source.cmp(&b.source))
518                .then_with(|| a.kind.as_str().cmp(b.kind.as_str()))
519                .then_with(|| a.target.cmp(&b.target))
520        });
521        if relationships.len() > max_files * 4 {
522            relationships.truncate(max_files * 4);
523            truncated = true;
524        }
525
526        if truncated {
527            warnings.push("Explore output was truncated to fit the configured source and relationship budgets.".to_string());
528        }
529
530        Ok(ExploreReport {
531            query: context.query,
532            max_files,
533            budget_guidance: explore_budget_guidance(stats.file_count),
534            source_files,
535            relationships,
536            additional_files,
537            warnings,
538            truncated,
539            truncated_reason: truncated.then(|| {
540                "Some source sections, files, or relationships exceeded the explore budget."
541                    .to_string()
542            }),
543        })
544    }
545
546    fn collect_explore_relationships(
547        &self,
548        node: &Node,
549        relationships: &mut Vec<ExploreRelationship>,
550        seen: &mut BTreeSet<String>,
551        additional_files: &mut BTreeSet<String>,
552    ) -> Result<()> {
553        for edge in self.get_callees(&node.id, 1)?.into_iter().take(4) {
554            if edge.edge.kind != EdgeKind::Contains {
555                push_explore_relationship(
556                    node,
557                    edge,
558                    "outgoing",
559                    relationships,
560                    seen,
561                    additional_files,
562                );
563            }
564        }
565        for edge in self.get_callers(&node.id, 1)?.into_iter().take(4) {
566            if edge.edge.kind != EdgeKind::Contains {
567                push_explore_relationship(
568                    node,
569                    edge,
570                    "incoming",
571                    relationships,
572                    seen,
573                    additional_files,
574                );
575            }
576        }
577        for file in self
578            .get_file_dependents(&node.file_path)?
579            .into_iter()
580            .take(4)
581        {
582            additional_files.insert(file);
583        }
584        Ok(())
585    }
586
587    fn find_context_nodes(
588        &self,
589        search_terms: &[String],
590        max_nodes: i64,
591    ) -> Result<Vec<(SearchResult, String)>> {
592        let limit = max_nodes.max(1);
593        let mut out = Vec::new();
594        let mut seen = BTreeSet::new();
595
596        for term in search_terms {
597            if out.len() >= limit as usize {
598                break;
599            }
600            let remaining = limit - out.len() as i64;
601            let results = self.search_nodes(
602                term,
603                SearchOptions {
604                    limit: remaining,
605                    ..Default::default()
606                },
607            )?;
608            for result in results {
609                if seen.insert(result.node.id.clone()) {
610                    out.push((result, term.clone()));
611                    if out.len() >= limit as usize {
612                        break;
613                    }
614                }
615            }
616        }
617
618        Ok(out)
619    }
620
621    pub fn read_node_source(&self, node: &Node) -> Result<String> {
622        let full = self.root.join(&node.file_path);
623        let text =
624            fs::read_to_string(&full).with_context(|| format!("reading {}", full.display()))?;
625        let lines: Vec<&str> = text.lines().collect();
626        let start = (node.start_line.saturating_sub(1) as usize).min(lines.len());
627        let end = (node.end_line.max(node.start_line) as usize).min(lines.len());
628        Ok(lines[start..end].join("\n"))
629    }
630
631    pub fn close(self) {}
632
633    fn index_changed_file(&self, path: &Path, result: &mut IndexResult) -> Result<()> {
634        let full = self.root.join(path);
635        let content = match fs::read_to_string(&full) {
636            Ok(content) => content,
637            Err(err) => {
638                push_index_error(result, categorize_read_error(&err), path, err.to_string());
639                return Ok(());
640            }
641        };
642        self.index_changed_file_with_content(path, content, None, result)
643    }
644
645    fn index_changed_file_with_content(
646        &self,
647        path: &Path,
648        content: String,
649        hash: Option<String>,
650        result: &mut IndexResult,
651    ) -> Result<()> {
652        let path_key = normalized_path(path);
653        let lang = detect_language(path, &content);
654        if lang.is_unknown() {
655            self.db.delete_file_index(&path_key)?;
656            push_index_error(
657                result,
658                IndexErrorCategory::Unsupported,
659                path,
660                "unsupported file type".to_string(),
661            );
662            return Ok(());
663        }
664        if detect_parse_error(&content, lang) {
665            self.db.delete_file_index(&path_key)?;
666            push_index_error(
667                result,
668                IndexErrorCategory::Parse,
669                path,
670                format!("could not parse {lang} syntax"),
671            );
672            return Ok(());
673        }
674        let full = self.root.join(path);
675        let metadata = fs::metadata(&full)?;
676        let extraction = extract_from_source(path, &content, lang);
677        let file = FileRecord {
678            path: path_key,
679            content_hash: hash.unwrap_or_else(|| content_hash(&content)),
680            language: lang,
681            size: metadata.len(),
682            modified_at: metadata
683                .modified()
684                .ok()
685                .and_then(system_time_ms)
686                .unwrap_or_default(),
687            indexed_at: now_ms(),
688            node_count: extraction.nodes.len() as i64,
689        };
690        self.db.replace_file_index(
691            &file,
692            &extraction.nodes,
693            &extraction.edges,
694            &extraction.unresolved_references,
695        )?;
696        result.files_indexed += 1;
697        result.nodes_created += extraction.nodes.len() as i64;
698        result.edges_created += extraction.edges.len() as i64;
699        Ok(())
700    }
701
702    fn scan_files(&self) -> Result<Vec<PathBuf>> {
703        let mut out = Vec::new();
704        let walker = ignore::WalkBuilder::new(&self.root)
705            .hidden(false)
706            .git_ignore(true)
707            .git_global(true)
708            .git_exclude(true)
709            .build();
710        for entry in walker {
711            let entry = entry?;
712            if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
713                continue;
714            }
715            let rel = entry
716                .path()
717                .strip_prefix(&self.root)
718                .unwrap_or(entry.path())
719                .to_path_buf();
720            if rel.components().any(|c| c.as_os_str() == CODEGRAPH_DIR) {
721                continue;
722            }
723            if should_include_file(&rel, &self.config) {
724                out.push(rel);
725            }
726        }
727        out.sort();
728        Ok(out)
729    }
730}
731
732fn categorize_read_error(err: &std::io::Error) -> IndexErrorCategory {
733    if err.kind() == std::io::ErrorKind::WouldBlock {
734        IndexErrorCategory::Lock
735    } else {
736        IndexErrorCategory::Read
737    }
738}
739
740fn push_index_error(
741    result: &mut IndexResult,
742    category: IndexErrorCategory,
743    path: &Path,
744    message: String,
745) {
746    result.files_errored += 1;
747    result.errors.push(IndexError {
748        category,
749        path: path.display().to_string(),
750        message,
751    });
752}
753
754fn file_list_entry(file: FileRecord, include_metadata: bool) -> FileListEntry {
755    FileListEntry {
756        path: file.path,
757        language: file.language,
758        node_count: file.node_count,
759        size: include_metadata.then_some(file.size),
760        modified_at: include_metadata.then_some(file.modified_at),
761        indexed_at: include_metadata.then_some(file.indexed_at),
762    }
763}
764
765fn normalized_path(path: &Path) -> String {
766    path.to_string_lossy().replace('\\', "/")
767}
768
769fn file_path_matches_filter(filter: &str, path: &str) -> bool {
770    let filter = filter.trim_matches('/');
771    filter.is_empty() || path == filter || path.starts_with(&format!("{filter}/"))
772}
773
774fn grouped_file_entries(files: &[FileListEntry]) -> Vec<FileLanguageGroup> {
775    let mut grouped: BTreeMap<String, (Language, Vec<FileListEntry>)> = BTreeMap::new();
776    for file in files {
777        grouped
778            .entry(file.language.as_str().to_string())
779            .or_insert_with(|| (file.language, Vec::new()))
780            .1
781            .push(file.clone());
782    }
783    grouped
784        .into_values()
785        .map(|(language, files)| FileLanguageGroup {
786            language,
787            count: files.len(),
788            files,
789        })
790        .collect()
791}
792
793fn build_file_tree(files: &[FileListEntry]) -> Vec<FileTreeEntry> {
794    let mut roots = Vec::new();
795    for file in files {
796        insert_tree_file(
797            &mut roots,
798            file,
799            &file.path.split('/').collect::<Vec<_>>(),
800            0,
801            "",
802        );
803    }
804    roots
805}
806
807fn insert_tree_file(
808    entries: &mut Vec<FileTreeEntry>,
809    file: &FileListEntry,
810    parts: &[&str],
811    index: usize,
812    parent: &str,
813) {
814    let Some(name) = parts.get(index) else {
815        return;
816    };
817    let path = if parent.is_empty() {
818        (*name).to_string()
819    } else {
820        format!("{parent}/{name}")
821    };
822    let is_file = index + 1 == parts.len();
823    let pos = entries
824        .iter()
825        .position(|entry| entry.name == *name && entry.kind == if is_file { "file" } else { "dir" })
826        .unwrap_or_else(|| {
827            entries.push(FileTreeEntry {
828                name: (*name).to_string(),
829                path: path.clone(),
830                kind: if is_file { "file" } else { "dir" }.to_string(),
831                language: is_file.then_some(file.language),
832                node_count: is_file.then_some(file.node_count),
833                size: file.size.filter(|_| is_file),
834                children: Vec::new(),
835            });
836            entries.len() - 1
837        });
838    if !is_file {
839        insert_tree_file(&mut entries[pos].children, file, parts, index + 1, &path);
840    }
841    entries.sort_by(|a, b| {
842        a.kind
843            .cmp(&b.kind)
844            .then_with(|| a.name.cmp(&b.name))
845            .then_with(|| a.path.cmp(&b.path))
846    });
847}
848
849fn file_pattern_matches(pattern: &str, path: &str) -> bool {
850    if pattern.is_empty() {
851        return true;
852    }
853    if let Some(ext) = pattern.strip_prefix("*.") {
854        return path.ends_with(&format!(".{ext}"));
855    }
856    if let Some(ext) = pattern.strip_prefix("**/*.") {
857        return path.ends_with(&format!(".{ext}"));
858    }
859    if pattern.contains('*') {
860        let parts = pattern.split('*').collect::<Vec<_>>();
861        let mut rest = path;
862        for (idx, part) in parts.iter().enumerate() {
863            if part.is_empty() {
864                continue;
865            }
866            if idx == 0 && !rest.starts_with(part) {
867                return false;
868            }
869            let Some(found) = rest.find(part) else {
870                return false;
871            };
872            rest = &rest[found + part.len()..];
873        }
874        return pattern.ends_with('*') || parts.last().is_some_and(|suffix| path.ends_with(suffix));
875    }
876    path.contains(pattern)
877}
878
879fn bounded_source_section(source: &str, max_chars: usize) -> (String, bool) {
880    if source.chars().count() <= max_chars {
881        return (source.to_string(), false);
882    }
883    let mut out = source.chars().take(max_chars).collect::<String>();
884    out.push_str("\n// [section truncated]");
885    (out, true)
886}
887
888fn push_explore_relationship(
889    root: &Node,
890    edge: NodeEdge,
891    direction: &str,
892    relationships: &mut Vec<ExploreRelationship>,
893    seen: &mut BTreeSet<String>,
894    additional_files: &mut BTreeSet<String>,
895) {
896    let key = format!(
897        "{}:{}:{}",
898        edge.edge.source,
899        edge.edge.kind.as_str(),
900        edge.edge.target
901    );
902    if !seen.insert(key) {
903        return;
904    }
905    if edge.node.file_path != root.file_path {
906        additional_files.insert(edge.node.file_path.clone());
907    }
908    let (source, target) = if direction == "outgoing" {
909        (root.name.clone(), edge.node.name.clone())
910    } else {
911        (edge.node.name.clone(), root.name.clone())
912    };
913    relationships.push(ExploreRelationship {
914        source,
915        target,
916        kind: edge.edge.kind,
917        file_path: edge.node.file_path,
918        direction: direction.to_string(),
919    });
920}
921
922fn explore_budget_guidance(file_count: i64) -> String {
923    match file_count {
924        0..=50 => "Small project: one or two focused explore calls should usually be enough.",
925        51..=250 => {
926            "Medium project: use a few targeted explore calls around concrete symbols or files."
927        }
928        _ => {
929            "Large project: keep explore calls narrow and follow up by file, symbol, or subsystem."
930        }
931    }
932    .to_string()
933}
934
935fn context_search_terms(task: &str) -> Vec<String> {
936    let mut terms = Vec::new();
937    let mut seen = BTreeSet::new();
938    push_context_term(task.trim(), &mut terms, &mut seen);
939
940    for raw in task.split(|c: char| {
941        !(c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '/' || c == '.' || c == ':')
942    }) {
943        let term = raw.trim_matches(|c: char| {
944            !(c.is_ascii_alphanumeric() || c == '_' || c == '/' || c == '.' || c == ':')
945        });
946        if is_useful_context_term(term) {
947            push_context_term(term, &mut terms, &mut seen);
948        }
949    }
950
951    terms
952}
953
954fn context_match_reason(task: &str, search_term: &str) -> String {
955    if task.trim().eq_ignore_ascii_case(search_term) {
956        "matched the full context query".to_string()
957    } else {
958        format!("matched extracted task term `{search_term}`")
959    }
960}
961
962fn push_context_term(term: &str, terms: &mut Vec<String>, seen: &mut BTreeSet<String>) {
963    if term.is_empty() {
964        return;
965    }
966    let key = term.to_ascii_lowercase();
967    if seen.insert(key) {
968        terms.push(term.to_string());
969    }
970}
971
972fn is_useful_context_term(term: &str) -> bool {
973    if term.len() < 3 {
974        return false;
975    }
976    if CONTEXT_STOP_WORDS.contains(&term.to_ascii_lowercase().as_str()) {
977        return false;
978    }
979    term.contains('_')
980        || term.contains('/')
981        || term.contains('.')
982        || term.contains(':')
983        || term.chars().any(|c| c.is_ascii_digit())
984        || term.chars().any(|c| c.is_ascii_uppercase())
985        || term.len() >= 5
986}
987
988const CONTEXT_STOP_WORDS: &[&str] = &[
989    "about",
990    "after",
991    "before",
992    "build",
993    "change",
994    "check",
995    "code",
996    "context",
997    "debug",
998    "error",
999    "feature",
1000    "files",
1001    "fix",
1002    "from",
1003    "handle",
1004    "how",
1005    "implement",
1006    "implemented",
1007    "invalid",
1008    "is",
1009    "issue",
1010    "order",
1011    "query",
1012    "return",
1013    "should",
1014    "task",
1015    "test",
1016    "tests",
1017    "update",
1018    "valid",
1019    "validation",
1020    "what",
1021    "when",
1022    "where",
1023    "which",
1024    "who",
1025    "why",
1026    "with",
1027];
1028
1029pub fn is_initialized(root: impl AsRef<Path>) -> bool {
1030    root.as_ref()
1031        .join(CODEGRAPH_DIR)
1032        .join(DATABASE_FILE)
1033        .exists()
1034}
1035
1036pub fn find_nearest_codegraph_root(start: impl AsRef<Path>) -> Option<PathBuf> {
1037    let mut cur = start
1038        .as_ref()
1039        .canonicalize()
1040        .unwrap_or_else(|_| start.as_ref().to_path_buf());
1041    if cur.is_file() {
1042        cur.pop();
1043    }
1044    loop {
1045        if is_initialized(&cur) {
1046            return Some(cur);
1047        }
1048        if !cur.pop() {
1049            return None;
1050        }
1051    }
1052}
1053
1054fn content_hash(content: &str) -> String {
1055    let mut h = Sha256::new();
1056    h.update(content.as_bytes());
1057    format!("{:x}", h.finalize())
1058}
1059
1060fn now_ms() -> i64 {
1061    system_time_ms(std::time::SystemTime::now()).unwrap_or_default()
1062}
1063
1064fn system_time_ms(t: std::time::SystemTime) -> Option<i64> {
1065    t.duration_since(std::time::UNIX_EPOCH)
1066        .ok()
1067        .map(|d| d.as_millis() as i64)
1068}
1069
1070fn is_test_file(file: &str) -> bool {
1071    let basename = file.rsplit('/').next().unwrap_or(file);
1072    file.ends_with(".mbt.md")
1073        || basename.ends_with("_test.mbt")
1074        || basename.ends_with("_wbtest.mbt")
1075        || file.contains("/__tests__/")
1076        || file.contains("/test/")
1077        || file.contains("/tests/")
1078        || file.contains("/e2e/")
1079        || file.contains("/spec/")
1080        || file.contains(".test.")
1081        || file.contains(".spec.")
1082}
1083
1084fn moonbit_same_package_tests(file: &str, indexed_files: &[FileRecord]) -> Vec<String> {
1085    if is_test_file(file) || !is_moonbit_source_file(file) {
1086        return Vec::new();
1087    }
1088    let Some(package_dir) = moonbit_package_dir(file, indexed_files) else {
1089        return Vec::new();
1090    };
1091    indexed_files
1092        .iter()
1093        .filter(|record| record.language == Language::MoonBit)
1094        .filter(|record| is_test_file(&record.path))
1095        .filter(|record| {
1096            moonbit_package_dir(&record.path, indexed_files).as_deref() == Some(&package_dir)
1097        })
1098        .map(|record| record.path.clone())
1099        .collect()
1100}
1101
1102#[derive(Debug, Default)]
1103struct MoonBitPackageGraph {
1104    package_by_dir: BTreeMap<String, MoonBitPackage>,
1105    reverse_imports: BTreeMap<String, BTreeSet<String>>,
1106}
1107
1108#[derive(Debug)]
1109struct MoonBitPackage {
1110    name: String,
1111    imports: Vec<String>,
1112    tests: Vec<String>,
1113}
1114
1115impl MoonBitPackageGraph {
1116    fn from_root(root: &Path, indexed_files: &[FileRecord]) -> Self {
1117        let module_name = moonbit_module_name(root, indexed_files);
1118        let mut package_by_dir = BTreeMap::new();
1119
1120        for record in indexed_files {
1121            if !is_moonbit_package_file(&record.path) {
1122                continue;
1123            }
1124            let dir = parent_dir(&record.path);
1125            let source = fs::read_to_string(root.join(&record.path)).unwrap_or_default();
1126            let (name, imports) = parse_moonbit_package_metadata(&source);
1127            let package_name =
1128                name.unwrap_or_else(|| moonbit_package_name_from_dir(module_name.as_deref(), &dir));
1129            package_by_dir.insert(
1130                dir.clone(),
1131                MoonBitPackage {
1132                    name: package_name,
1133                    imports,
1134                    tests: Vec::new(),
1135                },
1136            );
1137        }
1138
1139        let package_dirs: Vec<String> = package_by_dir.keys().cloned().collect();
1140        for record in indexed_files {
1141            if record.language != Language::MoonBit || !is_test_file(&record.path) {
1142                continue;
1143            }
1144            if let Some(package_dir) = moonbit_package_dir_from_dirs(&record.path, &package_dirs) {
1145                if let Some(package) = package_by_dir.get_mut(&package_dir) {
1146                    package.tests.push(record.path.clone());
1147                }
1148            }
1149        }
1150
1151        let local_names: BTreeSet<String> = package_by_dir
1152            .values()
1153            .map(|package| package.name.clone())
1154            .collect();
1155        let mut reverse_imports: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1156        for package in package_by_dir.values() {
1157            for import in &package.imports {
1158                if local_names.contains(import) {
1159                    reverse_imports
1160                        .entry(import.clone())
1161                        .or_default()
1162                        .insert(package.name.clone());
1163                }
1164            }
1165        }
1166
1167        Self {
1168            package_by_dir,
1169            reverse_imports,
1170        }
1171    }
1172
1173    fn dependent_package_tests(&self, file: &str) -> Vec<String> {
1174        if is_test_file(file) || !is_moonbit_source_file(file) {
1175            return Vec::new();
1176        }
1177        let Some(changed_package) = self.package_for_file(file) else {
1178            return Vec::new();
1179        };
1180
1181        let mut pending: Vec<String> = self
1182            .reverse_imports
1183            .get(&changed_package.name)
1184            .map(|deps| deps.iter().cloned().collect())
1185            .unwrap_or_default();
1186        let mut dependent_names = BTreeSet::new();
1187        while let Some(package_name) = pending.pop() {
1188            if !dependent_names.insert(package_name.clone()) {
1189                continue;
1190            }
1191            if let Some(next) = self.reverse_imports.get(&package_name) {
1192                pending.extend(next.iter().cloned());
1193            }
1194        }
1195
1196        self.package_by_dir
1197            .values()
1198            .filter(|package| dependent_names.contains(&package.name))
1199            .flat_map(|package| package.tests.clone())
1200            .collect()
1201    }
1202
1203    fn package_for_file(&self, file: &str) -> Option<&MoonBitPackage> {
1204        let package_dir = moonbit_package_dir_from_dirs(file, self.package_by_dir.keys())?;
1205        self.package_by_dir.get(&package_dir)
1206    }
1207}
1208
1209fn moonbit_module_name(root: &Path, indexed_files: &[FileRecord]) -> Option<String> {
1210    indexed_files
1211        .iter()
1212        .filter(|record| record.path.ends_with("moon.mod.json"))
1213        .min_by_key(|record| record.path.matches('/').count())
1214        .and_then(|record| fs::read_to_string(root.join(&record.path)).ok())
1215        .and_then(|source| {
1216            serde_json::from_str::<serde_json::Value>(&source)
1217                .ok()
1218                .and_then(|json| {
1219                    json.get("name")
1220                        .and_then(|value| value.as_str())
1221                        .map(str::to_string)
1222                })
1223        })
1224}
1225
1226fn parse_moonbit_package_metadata(source: &str) -> (Option<String>, Vec<String>) {
1227    let Ok(json) = serde_json::from_str::<serde_json::Value>(source) else {
1228        return (None, Vec::new());
1229    };
1230    let name = json
1231        .get("name")
1232        .and_then(|value| value.as_str())
1233        .map(str::to_string);
1234    let mut imports = Vec::new();
1235    if let Some(value) = json.get("import").or_else(|| json.get("imports")) {
1236        collect_moonbit_imports(value, &mut imports);
1237    }
1238    (name, imports)
1239}
1240
1241fn collect_moonbit_imports(value: &serde_json::Value, imports: &mut Vec<String>) {
1242    match value {
1243        serde_json::Value::String(import) => imports.push(import.clone()),
1244        serde_json::Value::Array(values) => {
1245            for value in values {
1246                collect_moonbit_imports(value, imports);
1247            }
1248        }
1249        serde_json::Value::Object(values) => {
1250            for (alias, value) in values {
1251                imports.push(value.as_str().unwrap_or(alias).to_string());
1252            }
1253        }
1254        _ => {}
1255    }
1256}
1257
1258fn moonbit_package_name_from_dir(module_name: Option<&str>, dir: &str) -> String {
1259    match (module_name, dir.is_empty()) {
1260        (Some(module), true) => module.to_string(),
1261        (Some(module), false) => format!("{module}/{dir}"),
1262        (None, true) => "moonbit-package".to_string(),
1263        (None, false) => dir.to_string(),
1264    }
1265}
1266
1267fn is_moonbit_source_file(file: &str) -> bool {
1268    file.ends_with(".mbt") || file.ends_with(".mbti") || file.ends_with(".mbt.md")
1269}
1270
1271fn is_moonbit_package_file(file: &str) -> bool {
1272    file.ends_with("moon.pkg.json") || file.ends_with("moon.pkg")
1273}
1274
1275fn moonbit_package_dir(file: &str, indexed_files: &[FileRecord]) -> Option<String> {
1276    let dirs: Vec<String> = indexed_files
1277        .iter()
1278        .filter(|record| is_moonbit_package_file(&record.path))
1279        .map(|record| parent_dir(&record.path))
1280        .collect();
1281    moonbit_package_dir_from_dirs(file, &dirs)
1282}
1283
1284fn moonbit_package_dir_from_dirs<'a, I>(file: &str, dirs: I) -> Option<String>
1285where
1286    I: IntoIterator<Item = &'a String>,
1287{
1288    let mut best: Option<&str> = None;
1289    for dir in dirs {
1290        if (dir.is_empty() || file == dir || file.starts_with(&format!("{dir}/")))
1291            && best
1292                .map(|current| dir.len() > current.len())
1293                .unwrap_or(true)
1294        {
1295            best = Some(dir);
1296        }
1297    }
1298    best.map(str::to_string)
1299}
1300
1301fn parent_dir(file: &str) -> String {
1302    file.rsplit_once('/')
1303        .map(|(dir, _)| dir.to_string())
1304        .unwrap_or_default()
1305}
1306
1307fn rust_name_heuristic_tests(file: &str, indexed_files: &[FileRecord]) -> Vec<String> {
1308    let Some(changed) = indexed_files.iter().find(|record| record.path == file) else {
1309        return Vec::new();
1310    };
1311    if changed.language != Language::Rust || is_test_file(file) {
1312        return Vec::new();
1313    }
1314    let Some(stem) = file
1315        .rsplit('/')
1316        .next()
1317        .and_then(|name| name.strip_suffix(".rs"))
1318    else {
1319        return Vec::new();
1320    };
1321    if stem.len() < 3 {
1322        return Vec::new();
1323    }
1324    indexed_files
1325        .iter()
1326        .filter(|record| record.language == Language::Rust)
1327        .filter(|record| is_test_file(&record.path))
1328        .filter(|record| rust_test_path_matches_stem(&record.path, stem))
1329        .map(|record| record.path.clone())
1330        .collect()
1331}
1332
1333fn rust_test_path_matches_stem(test_path: &str, stem: &str) -> bool {
1334    test_path
1335        .rsplit('/')
1336        .next()
1337        .unwrap_or(test_path)
1338        .strip_suffix(".rs")
1339        .map(|name| {
1340            name == stem
1341                || name.ends_with(&format!("_{stem}"))
1342                || name.starts_with(&format!("{stem}_"))
1343                || name.contains(&format!("_{stem}_"))
1344        })
1345        .unwrap_or(false)
1346}
1347
1348fn rust_workspace_heuristic_tests(
1349    root: &Path,
1350    file: &str,
1351    indexed_files: &[FileRecord],
1352) -> Vec<String> {
1353    let Some(changed) = indexed_files.iter().find(|record| record.path == file) else {
1354        return Vec::new();
1355    };
1356    if changed.language != Language::Rust || is_test_file(file) {
1357        return Vec::new();
1358    }
1359    let Some(crate_root) = rust_crate_root(file) else {
1360        return Vec::new();
1361    };
1362    indexed_files
1363        .iter()
1364        .filter(|record| record.language == Language::Rust)
1365        .filter(|record| record.path != file)
1366        .filter(|record| rust_crate_root(&record.path).as_deref() == Some(crate_root.as_str()))
1367        .filter(|record| {
1368            is_test_file(&record.path) || rust_file_contains_inline_tests(root, &record.path)
1369        })
1370        .map(|record| record.path.clone())
1371        .collect()
1372}
1373
1374fn rust_crate_root(file: &str) -> Option<String> {
1375    let parts: Vec<&str> = file.split('/').collect();
1376    if parts.len() >= 2 && parts[0] == "crates" {
1377        return Some(format!("{}/{}", parts[0], parts[1]));
1378    }
1379    parts
1380        .iter()
1381        .position(|part| *part == "src")
1382        .map(|index| parts[..index].join("/"))
1383}
1384
1385fn rust_file_contains_inline_tests(root: &Path, file: &str) -> bool {
1386    fs::read_to_string(root.join(file))
1387        .map(|text| text.contains("#[cfg(test)]") || text.contains("#[test]"))
1388        .unwrap_or(false)
1389}