1pub mod config;
2pub mod db;
3pub mod extraction;
4pub mod graph;
5pub mod installer;
6pub mod mcp;
7pub mod types;
8pub mod watcher;
9
10use anyhow::{anyhow, Context, Result};
11use config::{load_config, save_config, CodeGraphConfig};
12use db::Database;
13use extraction::{detect_language, detect_parse_error, extract_from_source, should_include_file};
14use graph::{GraphTraverser, Subgraph};
15use sha2::{Digest, Sha256};
16use std::collections::{BTreeMap, BTreeSet};
17use std::fs;
18use std::path::{Path, PathBuf};
19use types::{
20 AffectedDebugEntry, AffectedMatchSources, AffectedReport, ContextFileSummary, ContextMatch,
21 ContextReport, ContextSymbolSummary, EdgeKind, ExploreRelationship, ExploreReport,
22 ExploreSourceFile, ExploreSourceSection, FileLanguageGroup, FileListEntry, FileListFormat,
23 FileListOptions, FileListReport, FileRecord, FileTreeEntry, GraphPath, GraphStats, IndexError,
24 IndexErrorCategory, IndexResult, Language, Node, NodeEdge, SearchOptions, SearchResult,
25};
26
27pub const CODEGRAPH_DIR: &str = ".codegraph";
28pub const DATABASE_FILE: &str = "codegraph.db";
29
30pub struct CodeGraph {
31 root: PathBuf,
32 config: CodeGraphConfig,
33 db: Database,
34}
35
36impl CodeGraph {
37 pub fn init(root: impl AsRef<Path>) -> Result<Self> {
38 let root = root
39 .as_ref()
40 .canonicalize()
41 .unwrap_or_else(|_| root.as_ref().to_path_buf());
42 let dir = root.join(CODEGRAPH_DIR);
43 if dir.exists() {
44 return Err(anyhow!(
45 "CodeGraph already initialized in {}",
46 root.display()
47 ));
48 }
49 fs::create_dir_all(&dir).with_context(|| format!("creating {}", dir.display()))?;
50 let config = CodeGraphConfig::default_for_root(".");
51 save_config(&root, &config)?;
52 let db = Database::initialize(dir.join(DATABASE_FILE))?;
53 Ok(Self { root, config, db })
54 }
55
56 pub fn open(root: impl AsRef<Path>) -> Result<Self> {
57 let root = find_nearest_codegraph_root(root.as_ref())
58 .ok_or_else(|| anyhow!("CodeGraph not initialized in {}", root.as_ref().display()))?;
59 let config = load_config(&root)?;
60 let db = Database::open(root.join(CODEGRAPH_DIR).join(DATABASE_FILE))?;
61 Ok(Self { root, config, db })
62 }
63
64 pub fn root(&self) -> &Path {
65 &self.root
66 }
67
68 pub fn config(&self) -> &CodeGraphConfig {
69 &self.config
70 }
71
72 pub fn index_all(&mut self) -> Result<IndexResult> {
73 let start = std::time::Instant::now();
74 self.db.clear_all()?;
75 let files = self.scan_files()?;
76 let mut result = IndexResult::default();
77
78 for path in files {
79 self.index_changed_file(&path, &mut result)?;
80 }
81
82 self.db.clear_resolved_reference_edges()?;
83 self.db.resolve_references(&self.root)?;
84 result.edges_created = self.db.edge_count()?;
85 result.success = result.files_errored == 0;
86 result.duration_ms = start.elapsed().as_millis() as i64;
87 Ok(result)
88 }
89
90 pub fn sync(&mut self) -> Result<IndexResult> {
91 let start = std::time::Instant::now();
92 let files = self.scan_files()?;
93 let current_paths = files
94 .iter()
95 .map(|path| normalized_path(path))
96 .collect::<BTreeSet<_>>();
97 let existing = self
98 .db
99 .get_all_files()?
100 .into_iter()
101 .map(|file| (file.path.clone(), file))
102 .collect::<BTreeMap<_, _>>();
103 let mut result = IndexResult::default();
104 let mut changed = false;
105
106 for path in existing.keys() {
107 if !current_paths.contains(path) {
108 self.db.delete_file_index(path)?;
109 result.files_deleted += 1;
110 changed = true;
111 }
112 }
113
114 for path in files {
115 let path_key = normalized_path(&path);
116 let full = self.root.join(&path);
117 let content = match fs::read_to_string(&full) {
118 Ok(content) => content,
119 Err(err) => {
120 push_index_error(
121 &mut result,
122 categorize_read_error(&err),
123 &path,
124 err.to_string(),
125 );
126 continue;
127 }
128 };
129 let hash = content_hash(&content);
130 if existing
131 .get(&path_key)
132 .is_some_and(|file| file.content_hash == hash)
133 {
134 result.files_skipped += 1;
135 continue;
136 }
137 self.index_changed_file_with_content(&path, content, Some(hash), &mut result)?;
138 changed = true;
139 }
140
141 if changed {
142 self.db.clear_resolved_reference_edges()?;
143 self.db.resolve_references(&self.root)?;
144 }
145 result.edges_created = self.db.edge_count()?;
146 result.success = result.files_errored == 0;
147 result.duration_ms = start.elapsed().as_millis() as i64;
148 Ok(result)
149 }
150
151 pub fn stats(&self) -> Result<GraphStats> {
152 self.db.stats()
153 }
154
155 pub fn search_nodes(&self, query: &str, options: SearchOptions) -> Result<Vec<SearchResult>> {
156 self.db.search_nodes(query, options)
157 }
158
159 pub fn get_node(&self, id: &str) -> Result<Option<Node>> {
160 self.db.get_node(id)
161 }
162
163 pub fn get_callers(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
164 GraphTraverser::new(&self.db).get_callers(node_id, max_depth)
165 }
166
167 pub fn get_callees(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
168 GraphTraverser::new(&self.db).get_callees(node_id, max_depth)
169 }
170
171 pub fn get_impact_radius(&self, node_id: &str, max_depth: usize) -> Result<Subgraph> {
172 GraphTraverser::new(&self.db).get_impact_radius(node_id, max_depth)
173 }
174
175 pub fn find_paths(
176 &self,
177 from_node_id: &str,
178 to_node_id: &str,
179 max_depth: usize,
180 max_paths: usize,
181 ) -> Result<Vec<GraphPath>> {
182 GraphTraverser::new(&self.db).find_paths(from_node_id, to_node_id, max_depth, max_paths)
183 }
184
185 pub fn get_file_dependents(&self, file_path: &str) -> Result<Vec<String>> {
186 self.db.get_file_dependents(file_path)
187 }
188
189 pub fn get_all_files(&self) -> Result<Vec<FileRecord>> {
190 self.db.get_all_files()
191 }
192
193 pub fn list_files(&self, options: FileListOptions) -> Result<FileListReport> {
194 let max_depth = options.max_depth.map(|depth| depth.clamp(1, 20));
195 let mut files = self
196 .get_all_files()?
197 .into_iter()
198 .filter(|file| {
199 options
200 .path_filter
201 .as_deref()
202 .map(|path| file_path_matches_filter(path, &file.path))
203 .unwrap_or(true)
204 })
205 .filter(|file| {
206 options
207 .pattern
208 .as_deref()
209 .map(|pattern| file_pattern_matches(pattern, &file.path))
210 .unwrap_or(true)
211 })
212 .filter(|file| {
213 max_depth
214 .map(|depth| file.path.split('/').count() <= depth)
215 .unwrap_or(true)
216 })
217 .map(|file| file_list_entry(file, options.include_metadata))
218 .collect::<Vec<_>>();
219 files.sort_by(|a, b| a.path.cmp(&b.path));
220
221 let groups = if options.format == FileListFormat::Grouped {
222 grouped_file_entries(&files)
223 } else {
224 Vec::new()
225 };
226 let tree = if options.format == FileListFormat::Tree {
227 build_file_tree(&files)
228 } else {
229 Vec::new()
230 };
231 let format = match options.format {
232 FileListFormat::Grouped => "grouped",
233 FileListFormat::Flat => "flat",
234 FileListFormat::Tree => "tree",
235 }
236 .to_string();
237
238 Ok(FileListReport {
239 format,
240 path_filter: options.path_filter,
241 pattern: options.pattern,
242 include_metadata: options.include_metadata,
243 max_depth,
244 total_files: files.len(),
245 files,
246 groups,
247 tree,
248 })
249 }
250
251 pub fn build_affected_report(&self, files: &[String]) -> Result<AffectedReport> {
252 let indexed_files = self.get_all_files()?;
253 let moonbit_packages = MoonBitPackageGraph::from_root(&self.root, &indexed_files);
254 let mut affected = BTreeSet::new();
255 let mut debug = Vec::new();
256 let mut warnings = Vec::new();
257
258 for file in files {
259 if is_test_file(file) {
260 affected.insert(file.clone());
261 debug.push(AffectedDebugEntry {
262 changed_file: file.clone(),
263 reason: "changed file is a test file".to_string(),
264 matched_tests: vec![file.clone()],
265 matched_by: AffectedMatchSources {
266 direct_test_input: vec![file.clone()],
267 import_dependents: Vec::new(),
268 moonbit_same_package: Vec::new(),
269 moonbit_package_dependents: Vec::new(),
270 rust_name_heuristic: Vec::new(),
271 rust_workspace_heuristic: Vec::new(),
272 },
273 });
274 continue;
275 }
276
277 let mut matched = BTreeSet::new();
278 let mut import_dependents = BTreeSet::new();
279 for dep in self.get_file_dependents(file)? {
280 if is_test_file(&dep) {
281 import_dependents.insert(dep.clone());
282 matched.insert(dep.clone());
283 affected.insert(dep);
284 }
285 }
286
287 let moonbit_tests: BTreeSet<String> = moonbit_same_package_tests(file, &indexed_files)
288 .into_iter()
289 .collect();
290 for test in &moonbit_tests {
291 matched.insert(test.clone());
292 affected.insert(test.clone());
293 }
294 let moonbit_package_tests: BTreeSet<String> = moonbit_packages
295 .dependent_package_tests(file)
296 .into_iter()
297 .collect();
298 for test in &moonbit_package_tests {
299 matched.insert(test.clone());
300 affected.insert(test.clone());
301 }
302 let rust_tests: BTreeSet<String> = rust_name_heuristic_tests(file, &indexed_files)
303 .into_iter()
304 .collect();
305 for test in &rust_tests {
306 matched.insert(test.clone());
307 affected.insert(test.clone());
308 }
309 let rust_workspace_tests: BTreeSet<String> =
310 rust_workspace_heuristic_tests(&self.root, file, &indexed_files)
311 .into_iter()
312 .collect();
313 for test in &rust_workspace_tests {
314 matched.insert(test.clone());
315 affected.insert(test.clone());
316 }
317
318 if matched.is_empty() {
319 warnings.push(format!(
320 "{file}: no import-dependent tests, MoonBit same-package tests, MoonBit package-dependent tests, Rust name-heuristic tests, or Rust workspace tests found"
321 ));
322 }
323 debug.push(AffectedDebugEntry {
324 changed_file: file.clone(),
325 reason: if matched.is_empty() {
326 "no import-dependent tests, MoonBit same-package tests, MoonBit package-dependent tests, Rust name-heuristic tests, or Rust workspace tests found".to_string()
327 } else {
328 "matched import-dependent tests, MoonBit same-package tests, MoonBit package-dependent tests, Rust name-heuristic tests, and/or Rust workspace tests".to_string()
329 },
330 matched_tests: matched.into_iter().collect(),
331 matched_by: AffectedMatchSources {
332 direct_test_input: Vec::new(),
333 import_dependents: import_dependents.into_iter().collect(),
334 moonbit_same_package: moonbit_tests.into_iter().collect(),
335 moonbit_package_dependents: moonbit_package_tests.into_iter().collect(),
336 rust_name_heuristic: rust_tests.into_iter().collect(),
337 rust_workspace_heuristic: rust_workspace_tests.into_iter().collect(),
338 },
339 });
340 }
341
342 Ok(AffectedReport {
343 changed_files: files.to_vec(),
344 affected_tests: affected.into_iter().collect(),
345 debug,
346 warnings,
347 })
348 }
349
350 pub fn build_context(&self, task: &str, max_nodes: i64, include_code: bool) -> Result<String> {
351 let report = self.build_context_report(task, max_nodes, include_code)?;
352 let mut out = format!("## Context: {task}\n\n");
353 if report.matches.is_empty() {
354 for warning in &report.warnings {
355 out.push_str(warning);
356 out.push('\n');
357 }
358 return Ok(out);
359 }
360
361 for result in report.matches {
362 let n = result.node;
363 out.push_str(&format!(
364 "- `{}` `{}` at `{}:{}`",
365 n.kind, n.name, n.file_path, n.start_line
366 ));
367 if let Some(sig) = n.signature.as_deref() {
368 out.push_str(&format!(" — `{}`", sig.replace('\n', " ")));
369 }
370 out.push('\n');
371 if let Some(code) = result.code {
372 out.push_str("\n```");
373 out.push_str(n.language.as_str());
374 out.push('\n');
375 out.push_str(&code);
376 if !code.ends_with('\n') {
377 out.push('\n');
378 }
379 out.push_str("```\n\n");
380 }
381 }
382 Ok(out)
383 }
384
385 pub fn build_context_report(
386 &self,
387 task: &str,
388 max_nodes: i64,
389 include_code: bool,
390 ) -> Result<ContextReport> {
391 let query = task.trim().to_string();
392 let search_terms = context_search_terms(task);
393 let results = self.find_context_nodes(&search_terms, max_nodes)?;
394 let mut matches = Vec::new();
395 let mut files: BTreeMap<String, ContextFileSummary> = BTreeMap::new();
396 let mut symbols = Vec::new();
397
398 for (result, search_term) in results {
399 let code = if include_code {
400 self.read_node_source(&result.node).ok()
401 } else {
402 None
403 };
404 let file = files
405 .entry(result.node.file_path.clone())
406 .or_insert_with(|| ContextFileSummary {
407 path: result.node.file_path.clone(),
408 language: result.node.language,
409 match_count: 0,
410 symbols: Vec::new(),
411 });
412 file.match_count += 1;
413 if !file.symbols.iter().any(|name| name == &result.node.name) {
414 file.symbols.push(result.node.name.clone());
415 }
416 symbols.push(ContextSymbolSummary {
417 name: result.node.name.clone(),
418 kind: result.node.kind,
419 file_path: result.node.file_path.clone(),
420 start_line: result.node.start_line,
421 });
422 matches.push(ContextMatch {
423 reason: context_match_reason(task, &search_term),
424 search_term,
425 score: result.score,
426 node: result.node,
427 code,
428 });
429 }
430
431 let mut warnings = Vec::new();
432 if matches.is_empty() {
433 warnings.push("No matching symbols or files were found.".to_string());
434 warnings.push(
435 "Try a concrete symbol name, file name, package/module name, or a shorter code term. For candidate discovery, run `cgz query --json <term>`."
436 .to_string(),
437 );
438 }
439
440 Ok(ContextReport {
441 query,
442 search_terms,
443 matches,
444 files: files.into_values().collect(),
445 symbols,
446 warnings,
447 })
448 }
449
450 pub fn build_explore_report(&self, query: &str, max_files: usize) -> Result<ExploreReport> {
451 let max_files = max_files.clamp(1, 20);
452 let stats = self.stats()?;
453 let max_nodes = (max_files as i64 * 6).clamp(6, 120);
454 let context = self.build_context_report(query, max_nodes, true)?;
455 let mut source_files: BTreeMap<String, ExploreSourceFile> = BTreeMap::new();
456 let mut relationships = Vec::new();
457 let mut additional_files = BTreeSet::new();
458 let mut seen_relationships = BTreeSet::new();
459 let mut truncated = false;
460 let mut warnings = context.warnings.clone();
461
462 for matched in &context.matches {
463 let file = source_files
464 .entry(matched.node.file_path.clone())
465 .or_insert_with(|| ExploreSourceFile {
466 path: matched.node.file_path.clone(),
467 language: matched.node.language,
468 sections: Vec::new(),
469 });
470 if file.sections.len() < 4 {
471 let (code, section_truncated) =
472 bounded_source_section(matched.code.as_deref().unwrap_or_default(), 4_000);
473 truncated |= section_truncated;
474 file.sections.push(ExploreSourceSection {
475 symbol: matched.node.name.clone(),
476 kind: matched.node.kind,
477 start_line: matched.node.start_line,
478 end_line: matched.node.end_line,
479 reason: matched.reason.clone(),
480 code,
481 truncated: section_truncated,
482 });
483 } else {
484 truncated = true;
485 }
486
487 self.collect_explore_relationships(
488 &matched.node,
489 &mut relationships,
490 &mut seen_relationships,
491 &mut additional_files,
492 )?;
493 }
494
495 let mut source_files = source_files.into_values().collect::<Vec<_>>();
496 source_files.sort_by(|a, b| a.path.cmp(&b.path));
497 if source_files.len() > max_files {
498 for file in source_files.drain(max_files..) {
499 additional_files.insert(file.path);
500 }
501 truncated = true;
502 }
503
504 let source_paths = source_files
505 .iter()
506 .map(|file| file.path.as_str())
507 .collect::<BTreeSet<_>>();
508 let additional_files = additional_files
509 .into_iter()
510 .filter(|file| !source_paths.contains(file.as_str()))
511 .take(max_files)
512 .collect::<Vec<_>>();
513
514 relationships.sort_by(|a, b| {
515 a.file_path
516 .cmp(&b.file_path)
517 .then_with(|| a.source.cmp(&b.source))
518 .then_with(|| a.kind.as_str().cmp(b.kind.as_str()))
519 .then_with(|| a.target.cmp(&b.target))
520 });
521 if relationships.len() > max_files * 4 {
522 relationships.truncate(max_files * 4);
523 truncated = true;
524 }
525
526 if truncated {
527 warnings.push("Explore output was truncated to fit the configured source and relationship budgets.".to_string());
528 }
529
530 Ok(ExploreReport {
531 query: context.query,
532 max_files,
533 budget_guidance: explore_budget_guidance(stats.file_count),
534 source_files,
535 relationships,
536 additional_files,
537 warnings,
538 truncated,
539 truncated_reason: truncated.then(|| {
540 "Some source sections, files, or relationships exceeded the explore budget."
541 .to_string()
542 }),
543 })
544 }
545
546 fn collect_explore_relationships(
547 &self,
548 node: &Node,
549 relationships: &mut Vec<ExploreRelationship>,
550 seen: &mut BTreeSet<String>,
551 additional_files: &mut BTreeSet<String>,
552 ) -> Result<()> {
553 for edge in self.get_callees(&node.id, 1)?.into_iter().take(4) {
554 if edge.edge.kind != EdgeKind::Contains {
555 push_explore_relationship(
556 node,
557 edge,
558 "outgoing",
559 relationships,
560 seen,
561 additional_files,
562 );
563 }
564 }
565 for edge in self.get_callers(&node.id, 1)?.into_iter().take(4) {
566 if edge.edge.kind != EdgeKind::Contains {
567 push_explore_relationship(
568 node,
569 edge,
570 "incoming",
571 relationships,
572 seen,
573 additional_files,
574 );
575 }
576 }
577 for file in self
578 .get_file_dependents(&node.file_path)?
579 .into_iter()
580 .take(4)
581 {
582 additional_files.insert(file);
583 }
584 Ok(())
585 }
586
587 fn find_context_nodes(
588 &self,
589 search_terms: &[String],
590 max_nodes: i64,
591 ) -> Result<Vec<(SearchResult, String)>> {
592 let limit = max_nodes.max(1);
593 let mut out = Vec::new();
594 let mut seen = BTreeSet::new();
595
596 for term in search_terms {
597 if out.len() >= limit as usize {
598 break;
599 }
600 let remaining = limit - out.len() as i64;
601 let results = self.search_nodes(
602 term,
603 SearchOptions {
604 limit: remaining,
605 ..Default::default()
606 },
607 )?;
608 for result in results {
609 if seen.insert(result.node.id.clone()) {
610 out.push((result, term.clone()));
611 if out.len() >= limit as usize {
612 break;
613 }
614 }
615 }
616 }
617
618 Ok(out)
619 }
620
621 pub fn read_node_source(&self, node: &Node) -> Result<String> {
622 let full = self.root.join(&node.file_path);
623 let text =
624 fs::read_to_string(&full).with_context(|| format!("reading {}", full.display()))?;
625 let lines: Vec<&str> = text.lines().collect();
626 let start = (node.start_line.saturating_sub(1) as usize).min(lines.len());
627 let end = (node.end_line.max(node.start_line) as usize).min(lines.len());
628 Ok(lines[start..end].join("\n"))
629 }
630
631 pub fn close(self) {}
632
633 fn index_changed_file(&self, path: &Path, result: &mut IndexResult) -> Result<()> {
634 let full = self.root.join(path);
635 let content = match fs::read_to_string(&full) {
636 Ok(content) => content,
637 Err(err) => {
638 push_index_error(result, categorize_read_error(&err), path, err.to_string());
639 return Ok(());
640 }
641 };
642 self.index_changed_file_with_content(path, content, None, result)
643 }
644
645 fn index_changed_file_with_content(
646 &self,
647 path: &Path,
648 content: String,
649 hash: Option<String>,
650 result: &mut IndexResult,
651 ) -> Result<()> {
652 let path_key = normalized_path(path);
653 let lang = detect_language(path, &content);
654 if lang.is_unknown() {
655 self.db.delete_file_index(&path_key)?;
656 push_index_error(
657 result,
658 IndexErrorCategory::Unsupported,
659 path,
660 "unsupported file type".to_string(),
661 );
662 return Ok(());
663 }
664 if detect_parse_error(&content, lang) {
665 self.db.delete_file_index(&path_key)?;
666 push_index_error(
667 result,
668 IndexErrorCategory::Parse,
669 path,
670 format!("could not parse {lang} syntax"),
671 );
672 return Ok(());
673 }
674 let full = self.root.join(path);
675 let metadata = fs::metadata(&full)?;
676 let extraction = extract_from_source(path, &content, lang);
677 let file = FileRecord {
678 path: path_key,
679 content_hash: hash.unwrap_or_else(|| content_hash(&content)),
680 language: lang,
681 size: metadata.len(),
682 modified_at: metadata
683 .modified()
684 .ok()
685 .and_then(system_time_ms)
686 .unwrap_or_default(),
687 indexed_at: now_ms(),
688 node_count: extraction.nodes.len() as i64,
689 };
690 self.db.replace_file_index(
691 &file,
692 &extraction.nodes,
693 &extraction.edges,
694 &extraction.unresolved_references,
695 )?;
696 result.files_indexed += 1;
697 result.nodes_created += extraction.nodes.len() as i64;
698 result.edges_created += extraction.edges.len() as i64;
699 Ok(())
700 }
701
702 fn scan_files(&self) -> Result<Vec<PathBuf>> {
703 let mut out = Vec::new();
704 let walker = ignore::WalkBuilder::new(&self.root)
705 .hidden(false)
706 .git_ignore(true)
707 .git_global(true)
708 .git_exclude(true)
709 .build();
710 for entry in walker {
711 let entry = entry?;
712 if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
713 continue;
714 }
715 let rel = entry
716 .path()
717 .strip_prefix(&self.root)
718 .unwrap_or(entry.path())
719 .to_path_buf();
720 if rel.components().any(|c| c.as_os_str() == CODEGRAPH_DIR) {
721 continue;
722 }
723 if should_include_file(&rel, &self.config) {
724 out.push(rel);
725 }
726 }
727 out.sort();
728 Ok(out)
729 }
730}
731
732fn categorize_read_error(err: &std::io::Error) -> IndexErrorCategory {
733 if err.kind() == std::io::ErrorKind::WouldBlock {
734 IndexErrorCategory::Lock
735 } else {
736 IndexErrorCategory::Read
737 }
738}
739
740fn push_index_error(
741 result: &mut IndexResult,
742 category: IndexErrorCategory,
743 path: &Path,
744 message: String,
745) {
746 result.files_errored += 1;
747 result.errors.push(IndexError {
748 category,
749 path: path.display().to_string(),
750 message,
751 });
752}
753
754fn file_list_entry(file: FileRecord, include_metadata: bool) -> FileListEntry {
755 FileListEntry {
756 path: file.path,
757 language: file.language,
758 node_count: file.node_count,
759 size: include_metadata.then_some(file.size),
760 modified_at: include_metadata.then_some(file.modified_at),
761 indexed_at: include_metadata.then_some(file.indexed_at),
762 }
763}
764
765fn normalized_path(path: &Path) -> String {
766 path.to_string_lossy().replace('\\', "/")
767}
768
769fn file_path_matches_filter(filter: &str, path: &str) -> bool {
770 let filter = filter.trim_matches('/');
771 filter.is_empty() || path == filter || path.starts_with(&format!("{filter}/"))
772}
773
774fn grouped_file_entries(files: &[FileListEntry]) -> Vec<FileLanguageGroup> {
775 let mut grouped: BTreeMap<String, (Language, Vec<FileListEntry>)> = BTreeMap::new();
776 for file in files {
777 grouped
778 .entry(file.language.as_str().to_string())
779 .or_insert_with(|| (file.language, Vec::new()))
780 .1
781 .push(file.clone());
782 }
783 grouped
784 .into_values()
785 .map(|(language, files)| FileLanguageGroup {
786 language,
787 count: files.len(),
788 files,
789 })
790 .collect()
791}
792
793fn build_file_tree(files: &[FileListEntry]) -> Vec<FileTreeEntry> {
794 let mut roots = Vec::new();
795 for file in files {
796 insert_tree_file(
797 &mut roots,
798 file,
799 &file.path.split('/').collect::<Vec<_>>(),
800 0,
801 "",
802 );
803 }
804 roots
805}
806
807fn insert_tree_file(
808 entries: &mut Vec<FileTreeEntry>,
809 file: &FileListEntry,
810 parts: &[&str],
811 index: usize,
812 parent: &str,
813) {
814 let Some(name) = parts.get(index) else {
815 return;
816 };
817 let path = if parent.is_empty() {
818 (*name).to_string()
819 } else {
820 format!("{parent}/{name}")
821 };
822 let is_file = index + 1 == parts.len();
823 let pos = entries
824 .iter()
825 .position(|entry| entry.name == *name && entry.kind == if is_file { "file" } else { "dir" })
826 .unwrap_or_else(|| {
827 entries.push(FileTreeEntry {
828 name: (*name).to_string(),
829 path: path.clone(),
830 kind: if is_file { "file" } else { "dir" }.to_string(),
831 language: is_file.then_some(file.language),
832 node_count: is_file.then_some(file.node_count),
833 size: file.size.filter(|_| is_file),
834 children: Vec::new(),
835 });
836 entries.len() - 1
837 });
838 if !is_file {
839 insert_tree_file(&mut entries[pos].children, file, parts, index + 1, &path);
840 }
841 entries.sort_by(|a, b| {
842 a.kind
843 .cmp(&b.kind)
844 .then_with(|| a.name.cmp(&b.name))
845 .then_with(|| a.path.cmp(&b.path))
846 });
847}
848
849fn file_pattern_matches(pattern: &str, path: &str) -> bool {
850 if pattern.is_empty() {
851 return true;
852 }
853 if let Some(ext) = pattern.strip_prefix("*.") {
854 return path.ends_with(&format!(".{ext}"));
855 }
856 if let Some(ext) = pattern.strip_prefix("**/*.") {
857 return path.ends_with(&format!(".{ext}"));
858 }
859 if pattern.contains('*') {
860 let parts = pattern.split('*').collect::<Vec<_>>();
861 let mut rest = path;
862 for (idx, part) in parts.iter().enumerate() {
863 if part.is_empty() {
864 continue;
865 }
866 if idx == 0 && !rest.starts_with(part) {
867 return false;
868 }
869 let Some(found) = rest.find(part) else {
870 return false;
871 };
872 rest = &rest[found + part.len()..];
873 }
874 return pattern.ends_with('*') || parts.last().is_some_and(|suffix| path.ends_with(suffix));
875 }
876 path.contains(pattern)
877}
878
879fn bounded_source_section(source: &str, max_chars: usize) -> (String, bool) {
880 if source.chars().count() <= max_chars {
881 return (source.to_string(), false);
882 }
883 let mut out = source.chars().take(max_chars).collect::<String>();
884 out.push_str("\n// [section truncated]");
885 (out, true)
886}
887
888fn push_explore_relationship(
889 root: &Node,
890 edge: NodeEdge,
891 direction: &str,
892 relationships: &mut Vec<ExploreRelationship>,
893 seen: &mut BTreeSet<String>,
894 additional_files: &mut BTreeSet<String>,
895) {
896 let key = format!(
897 "{}:{}:{}",
898 edge.edge.source,
899 edge.edge.kind.as_str(),
900 edge.edge.target
901 );
902 if !seen.insert(key) {
903 return;
904 }
905 if edge.node.file_path != root.file_path {
906 additional_files.insert(edge.node.file_path.clone());
907 }
908 let (source, target) = if direction == "outgoing" {
909 (root.name.clone(), edge.node.name.clone())
910 } else {
911 (edge.node.name.clone(), root.name.clone())
912 };
913 relationships.push(ExploreRelationship {
914 source,
915 target,
916 kind: edge.edge.kind,
917 file_path: edge.node.file_path,
918 direction: direction.to_string(),
919 });
920}
921
922fn explore_budget_guidance(file_count: i64) -> String {
923 match file_count {
924 0..=50 => "Small project: one or two focused explore calls should usually be enough.",
925 51..=250 => {
926 "Medium project: use a few targeted explore calls around concrete symbols or files."
927 }
928 _ => {
929 "Large project: keep explore calls narrow and follow up by file, symbol, or subsystem."
930 }
931 }
932 .to_string()
933}
934
935fn context_search_terms(task: &str) -> Vec<String> {
936 let mut terms = Vec::new();
937 let mut seen = BTreeSet::new();
938 push_context_term(task.trim(), &mut terms, &mut seen);
939
940 for raw in task.split(|c: char| {
941 !(c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '/' || c == '.' || c == ':')
942 }) {
943 let term = raw.trim_matches(|c: char| {
944 !(c.is_ascii_alphanumeric() || c == '_' || c == '/' || c == '.' || c == ':')
945 });
946 if is_useful_context_term(term) {
947 push_context_term(term, &mut terms, &mut seen);
948 }
949 }
950
951 terms
952}
953
954fn context_match_reason(task: &str, search_term: &str) -> String {
955 if task.trim().eq_ignore_ascii_case(search_term) {
956 "matched the full context query".to_string()
957 } else {
958 format!("matched extracted task term `{search_term}`")
959 }
960}
961
962fn push_context_term(term: &str, terms: &mut Vec<String>, seen: &mut BTreeSet<String>) {
963 if term.is_empty() {
964 return;
965 }
966 let key = term.to_ascii_lowercase();
967 if seen.insert(key) {
968 terms.push(term.to_string());
969 }
970}
971
972fn is_useful_context_term(term: &str) -> bool {
973 if term.len() < 3 {
974 return false;
975 }
976 if CONTEXT_STOP_WORDS.contains(&term.to_ascii_lowercase().as_str()) {
977 return false;
978 }
979 term.contains('_')
980 || term.contains('/')
981 || term.contains('.')
982 || term.contains(':')
983 || term.chars().any(|c| c.is_ascii_digit())
984 || term.chars().any(|c| c.is_ascii_uppercase())
985 || term.len() >= 5
986}
987
988const CONTEXT_STOP_WORDS: &[&str] = &[
989 "about",
990 "after",
991 "before",
992 "build",
993 "change",
994 "check",
995 "code",
996 "context",
997 "debug",
998 "error",
999 "feature",
1000 "files",
1001 "fix",
1002 "from",
1003 "handle",
1004 "how",
1005 "implement",
1006 "implemented",
1007 "invalid",
1008 "is",
1009 "issue",
1010 "order",
1011 "query",
1012 "return",
1013 "should",
1014 "task",
1015 "test",
1016 "tests",
1017 "update",
1018 "valid",
1019 "validation",
1020 "what",
1021 "when",
1022 "where",
1023 "which",
1024 "who",
1025 "why",
1026 "with",
1027];
1028
1029pub fn is_initialized(root: impl AsRef<Path>) -> bool {
1030 root.as_ref()
1031 .join(CODEGRAPH_DIR)
1032 .join(DATABASE_FILE)
1033 .exists()
1034}
1035
1036pub fn find_nearest_codegraph_root(start: impl AsRef<Path>) -> Option<PathBuf> {
1037 let mut cur = start
1038 .as_ref()
1039 .canonicalize()
1040 .unwrap_or_else(|_| start.as_ref().to_path_buf());
1041 if cur.is_file() {
1042 cur.pop();
1043 }
1044 loop {
1045 if is_initialized(&cur) {
1046 return Some(cur);
1047 }
1048 if !cur.pop() {
1049 return None;
1050 }
1051 }
1052}
1053
1054fn content_hash(content: &str) -> String {
1055 let mut h = Sha256::new();
1056 h.update(content.as_bytes());
1057 format!("{:x}", h.finalize())
1058}
1059
1060fn now_ms() -> i64 {
1061 system_time_ms(std::time::SystemTime::now()).unwrap_or_default()
1062}
1063
1064fn system_time_ms(t: std::time::SystemTime) -> Option<i64> {
1065 t.duration_since(std::time::UNIX_EPOCH)
1066 .ok()
1067 .map(|d| d.as_millis() as i64)
1068}
1069
1070fn is_test_file(file: &str) -> bool {
1071 let basename = file.rsplit('/').next().unwrap_or(file);
1072 file.ends_with(".mbt.md")
1073 || basename.ends_with("_test.mbt")
1074 || basename.ends_with("_wbtest.mbt")
1075 || file.contains("/__tests__/")
1076 || file.contains("/test/")
1077 || file.contains("/tests/")
1078 || file.contains("/e2e/")
1079 || file.contains("/spec/")
1080 || file.contains(".test.")
1081 || file.contains(".spec.")
1082}
1083
1084fn moonbit_same_package_tests(file: &str, indexed_files: &[FileRecord]) -> Vec<String> {
1085 if is_test_file(file) || !is_moonbit_source_file(file) {
1086 return Vec::new();
1087 }
1088 let Some(package_dir) = moonbit_package_dir(file, indexed_files) else {
1089 return Vec::new();
1090 };
1091 indexed_files
1092 .iter()
1093 .filter(|record| record.language == Language::MoonBit)
1094 .filter(|record| is_test_file(&record.path))
1095 .filter(|record| {
1096 moonbit_package_dir(&record.path, indexed_files).as_deref() == Some(&package_dir)
1097 })
1098 .map(|record| record.path.clone())
1099 .collect()
1100}
1101
1102#[derive(Debug, Default)]
1103struct MoonBitPackageGraph {
1104 package_by_dir: BTreeMap<String, MoonBitPackage>,
1105 reverse_imports: BTreeMap<String, BTreeSet<String>>,
1106}
1107
1108#[derive(Debug)]
1109struct MoonBitPackage {
1110 name: String,
1111 imports: Vec<String>,
1112 tests: Vec<String>,
1113}
1114
1115impl MoonBitPackageGraph {
1116 fn from_root(root: &Path, indexed_files: &[FileRecord]) -> Self {
1117 let module_name = moonbit_module_name(root, indexed_files);
1118 let mut package_by_dir = BTreeMap::new();
1119
1120 for record in indexed_files {
1121 if !is_moonbit_package_file(&record.path) {
1122 continue;
1123 }
1124 let dir = parent_dir(&record.path);
1125 let source = fs::read_to_string(root.join(&record.path)).unwrap_or_default();
1126 let (name, imports) = parse_moonbit_package_metadata(&source);
1127 let package_name =
1128 name.unwrap_or_else(|| moonbit_package_name_from_dir(module_name.as_deref(), &dir));
1129 package_by_dir.insert(
1130 dir.clone(),
1131 MoonBitPackage {
1132 name: package_name,
1133 imports,
1134 tests: Vec::new(),
1135 },
1136 );
1137 }
1138
1139 let package_dirs: Vec<String> = package_by_dir.keys().cloned().collect();
1140 for record in indexed_files {
1141 if record.language != Language::MoonBit || !is_test_file(&record.path) {
1142 continue;
1143 }
1144 if let Some(package_dir) = moonbit_package_dir_from_dirs(&record.path, &package_dirs) {
1145 if let Some(package) = package_by_dir.get_mut(&package_dir) {
1146 package.tests.push(record.path.clone());
1147 }
1148 }
1149 }
1150
1151 let local_names: BTreeSet<String> = package_by_dir
1152 .values()
1153 .map(|package| package.name.clone())
1154 .collect();
1155 let mut reverse_imports: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
1156 for package in package_by_dir.values() {
1157 for import in &package.imports {
1158 if local_names.contains(import) {
1159 reverse_imports
1160 .entry(import.clone())
1161 .or_default()
1162 .insert(package.name.clone());
1163 }
1164 }
1165 }
1166
1167 Self {
1168 package_by_dir,
1169 reverse_imports,
1170 }
1171 }
1172
1173 fn dependent_package_tests(&self, file: &str) -> Vec<String> {
1174 if is_test_file(file) || !is_moonbit_source_file(file) {
1175 return Vec::new();
1176 }
1177 let Some(changed_package) = self.package_for_file(file) else {
1178 return Vec::new();
1179 };
1180
1181 let mut pending: Vec<String> = self
1182 .reverse_imports
1183 .get(&changed_package.name)
1184 .map(|deps| deps.iter().cloned().collect())
1185 .unwrap_or_default();
1186 let mut dependent_names = BTreeSet::new();
1187 while let Some(package_name) = pending.pop() {
1188 if !dependent_names.insert(package_name.clone()) {
1189 continue;
1190 }
1191 if let Some(next) = self.reverse_imports.get(&package_name) {
1192 pending.extend(next.iter().cloned());
1193 }
1194 }
1195
1196 self.package_by_dir
1197 .values()
1198 .filter(|package| dependent_names.contains(&package.name))
1199 .flat_map(|package| package.tests.clone())
1200 .collect()
1201 }
1202
1203 fn package_for_file(&self, file: &str) -> Option<&MoonBitPackage> {
1204 let package_dir = moonbit_package_dir_from_dirs(file, self.package_by_dir.keys())?;
1205 self.package_by_dir.get(&package_dir)
1206 }
1207}
1208
1209fn moonbit_module_name(root: &Path, indexed_files: &[FileRecord]) -> Option<String> {
1210 indexed_files
1211 .iter()
1212 .filter(|record| record.path.ends_with("moon.mod.json"))
1213 .min_by_key(|record| record.path.matches('/').count())
1214 .and_then(|record| fs::read_to_string(root.join(&record.path)).ok())
1215 .and_then(|source| {
1216 serde_json::from_str::<serde_json::Value>(&source)
1217 .ok()
1218 .and_then(|json| {
1219 json.get("name")
1220 .and_then(|value| value.as_str())
1221 .map(str::to_string)
1222 })
1223 })
1224}
1225
1226fn parse_moonbit_package_metadata(source: &str) -> (Option<String>, Vec<String>) {
1227 let Ok(json) = serde_json::from_str::<serde_json::Value>(source) else {
1228 return (None, Vec::new());
1229 };
1230 let name = json
1231 .get("name")
1232 .and_then(|value| value.as_str())
1233 .map(str::to_string);
1234 let mut imports = Vec::new();
1235 if let Some(value) = json.get("import").or_else(|| json.get("imports")) {
1236 collect_moonbit_imports(value, &mut imports);
1237 }
1238 (name, imports)
1239}
1240
1241fn collect_moonbit_imports(value: &serde_json::Value, imports: &mut Vec<String>) {
1242 match value {
1243 serde_json::Value::String(import) => imports.push(import.clone()),
1244 serde_json::Value::Array(values) => {
1245 for value in values {
1246 collect_moonbit_imports(value, imports);
1247 }
1248 }
1249 serde_json::Value::Object(values) => {
1250 for (alias, value) in values {
1251 imports.push(value.as_str().unwrap_or(alias).to_string());
1252 }
1253 }
1254 _ => {}
1255 }
1256}
1257
1258fn moonbit_package_name_from_dir(module_name: Option<&str>, dir: &str) -> String {
1259 match (module_name, dir.is_empty()) {
1260 (Some(module), true) => module.to_string(),
1261 (Some(module), false) => format!("{module}/{dir}"),
1262 (None, true) => "moonbit-package".to_string(),
1263 (None, false) => dir.to_string(),
1264 }
1265}
1266
1267fn is_moonbit_source_file(file: &str) -> bool {
1268 file.ends_with(".mbt") || file.ends_with(".mbti") || file.ends_with(".mbt.md")
1269}
1270
1271fn is_moonbit_package_file(file: &str) -> bool {
1272 file.ends_with("moon.pkg.json") || file.ends_with("moon.pkg")
1273}
1274
1275fn moonbit_package_dir(file: &str, indexed_files: &[FileRecord]) -> Option<String> {
1276 let dirs: Vec<String> = indexed_files
1277 .iter()
1278 .filter(|record| is_moonbit_package_file(&record.path))
1279 .map(|record| parent_dir(&record.path))
1280 .collect();
1281 moonbit_package_dir_from_dirs(file, &dirs)
1282}
1283
1284fn moonbit_package_dir_from_dirs<'a, I>(file: &str, dirs: I) -> Option<String>
1285where
1286 I: IntoIterator<Item = &'a String>,
1287{
1288 let mut best: Option<&str> = None;
1289 for dir in dirs {
1290 if (dir.is_empty() || file == dir || file.starts_with(&format!("{dir}/")))
1291 && best
1292 .map(|current| dir.len() > current.len())
1293 .unwrap_or(true)
1294 {
1295 best = Some(dir);
1296 }
1297 }
1298 best.map(str::to_string)
1299}
1300
1301fn parent_dir(file: &str) -> String {
1302 file.rsplit_once('/')
1303 .map(|(dir, _)| dir.to_string())
1304 .unwrap_or_default()
1305}
1306
1307fn rust_name_heuristic_tests(file: &str, indexed_files: &[FileRecord]) -> Vec<String> {
1308 let Some(changed) = indexed_files.iter().find(|record| record.path == file) else {
1309 return Vec::new();
1310 };
1311 if changed.language != Language::Rust || is_test_file(file) {
1312 return Vec::new();
1313 }
1314 let Some(stem) = file
1315 .rsplit('/')
1316 .next()
1317 .and_then(|name| name.strip_suffix(".rs"))
1318 else {
1319 return Vec::new();
1320 };
1321 if stem.len() < 3 {
1322 return Vec::new();
1323 }
1324 indexed_files
1325 .iter()
1326 .filter(|record| record.language == Language::Rust)
1327 .filter(|record| is_test_file(&record.path))
1328 .filter(|record| rust_test_path_matches_stem(&record.path, stem))
1329 .map(|record| record.path.clone())
1330 .collect()
1331}
1332
1333fn rust_test_path_matches_stem(test_path: &str, stem: &str) -> bool {
1334 test_path
1335 .rsplit('/')
1336 .next()
1337 .unwrap_or(test_path)
1338 .strip_suffix(".rs")
1339 .map(|name| {
1340 name == stem
1341 || name.ends_with(&format!("_{stem}"))
1342 || name.starts_with(&format!("{stem}_"))
1343 || name.contains(&format!("_{stem}_"))
1344 })
1345 .unwrap_or(false)
1346}
1347
1348fn rust_workspace_heuristic_tests(
1349 root: &Path,
1350 file: &str,
1351 indexed_files: &[FileRecord],
1352) -> Vec<String> {
1353 let Some(changed) = indexed_files.iter().find(|record| record.path == file) else {
1354 return Vec::new();
1355 };
1356 if changed.language != Language::Rust || is_test_file(file) {
1357 return Vec::new();
1358 }
1359 let Some(crate_root) = rust_crate_root(file) else {
1360 return Vec::new();
1361 };
1362 indexed_files
1363 .iter()
1364 .filter(|record| record.language == Language::Rust)
1365 .filter(|record| record.path != file)
1366 .filter(|record| rust_crate_root(&record.path).as_deref() == Some(crate_root.as_str()))
1367 .filter(|record| {
1368 is_test_file(&record.path) || rust_file_contains_inline_tests(root, &record.path)
1369 })
1370 .map(|record| record.path.clone())
1371 .collect()
1372}
1373
1374fn rust_crate_root(file: &str) -> Option<String> {
1375 let parts: Vec<&str> = file.split('/').collect();
1376 if parts.len() >= 2 && parts[0] == "crates" {
1377 return Some(format!("{}/{}", parts[0], parts[1]));
1378 }
1379 parts
1380 .iter()
1381 .position(|part| *part == "src")
1382 .map(|index| parts[..index].join("/"))
1383}
1384
1385fn rust_file_contains_inline_tests(root: &Path, file: &str) -> bool {
1386 fs::read_to_string(root.join(file))
1387 .map(|text| text.contains("#[cfg(test)]") || text.contains("#[test]"))
1388 .unwrap_or(false)
1389}