1use crate::config::CodeGraphConfig;
2use crate::types::*;
3use regex::Regex;
4use std::path::Path;
5use tree_sitter::{Node as SyntaxNode, Parser};
6
7pub fn should_include_file(path: &Path, config: &CodeGraphConfig) -> bool {
8 let s = path.to_string_lossy().replace('\\', "/");
9 if s.starts_with(".codegraph/") {
10 return false;
11 }
12 if config.exclude.iter().any(|p| glob_match(p, &s)) {
13 return false;
14 }
15 config.include.iter().any(|p| glob_match(p, &s))
16}
17
18fn glob_match(pattern: &str, path: &str) -> bool {
19 let suffix = pattern.strip_prefix("**/*.");
20 if let Some(ext) = suffix {
21 return path.ends_with(&format!(".{}", ext));
22 }
23 if let Some(dir) = pattern
24 .strip_prefix("**/")
25 .and_then(|p| p.strip_suffix("/**"))
26 {
27 return path.contains(&format!("{}/", dir)) || path == dir;
28 }
29 if let Some(suffix) = pattern.strip_prefix("**/") {
30 return path.ends_with(suffix);
31 }
32 pattern == path
33}
34
35pub fn detect_language(path: &Path, _source: &str) -> Language {
36 let name = path
37 .file_name()
38 .and_then(|s| s.to_str())
39 .unwrap_or_default()
40 .to_lowercase();
41 if name == "moon.mod.json" || name == "moon.pkg.json" || name == "moon.pkg" {
42 return Language::MoonBit;
43 }
44 if name.ends_with(".mbt.md") {
45 return Language::MoonBit;
46 }
47 match path
48 .extension()
49 .and_then(|s| s.to_str())
50 .unwrap_or_default()
51 .to_lowercase()
52 .as_str()
53 {
54 "ts" => Language::TypeScript,
55 "tsx" => Language::Tsx,
56 "js" | "mjs" | "cjs" => Language::JavaScript,
57 "jsx" => Language::Jsx,
58 "py" | "pyw" => Language::Python,
59 "go" => Language::Go,
60 "rs" => Language::Rust,
61 "java" => Language::Java,
62 "c" | "h" => Language::C,
63 "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Language::Cpp,
64 "cs" => Language::CSharp,
65 "php" => Language::Php,
66 "rb" | "rake" => Language::Ruby,
67 "swift" => Language::Swift,
68 "kt" | "kts" => Language::Kotlin,
69 "dart" => Language::Dart,
70 "svelte" => Language::Svelte,
71 "vue" => Language::Vue,
72 "liquid" => Language::Liquid,
73 "pas" | "dpr" | "dpk" | "lpr" | "dfm" | "fmx" => Language::Pascal,
74 "scala" | "sc" => Language::Scala,
75 "mbt" | "mbti" => Language::MoonBit,
76 _ => Language::Unknown,
77 }
78}
79
80pub fn extract_from_source(path: &Path, source: &str, language: Language) -> ExtractionResult {
81 let file_path = path.to_string_lossy().replace('\\', "/");
82 let now = now_ms();
83 let mut nodes = vec![Node {
84 id: format!("file:{}", file_path),
85 kind: NodeKind::File,
86 name: path
87 .file_name()
88 .and_then(|s| s.to_str())
89 .unwrap_or(&file_path)
90 .to_string(),
91 qualified_name: file_path.clone(),
92 file_path: file_path.clone(),
93 language,
94 start_line: 1,
95 end_line: source.lines().count().max(1) as i64,
96 start_column: 0,
97 end_column: 0,
98 docstring: None,
99 signature: None,
100 visibility: None,
101 is_exported: false,
102 is_async: false,
103 is_static: false,
104 is_abstract: false,
105 updated_at: now,
106 }];
107 let mut edges = Vec::new();
108 let mut refs = Vec::new();
109
110 match language {
111 Language::Rust => extract_rust(&file_path, source, now, &mut nodes, &mut edges, &mut refs),
112 Language::MoonBit => {
113 extract_moonbit(&file_path, source, now, &mut nodes, &mut edges, &mut refs)
114 }
115 _ => extract_generic(
116 &file_path, source, language, now, &mut nodes, &mut edges, &mut refs,
117 ),
118 }
119
120 ExtractionResult {
121 nodes,
122 edges,
123 unresolved_references: refs,
124 }
125}
126
127fn extract_rust(
128 file_path: &str,
129 source: &str,
130 now: i64,
131 nodes: &mut Vec<Node>,
132 edges: &mut Vec<Edge>,
133 refs: &mut Vec<UnresolvedReference>,
134) {
135 if try_extract_rust_tree_sitter(file_path, source, now, nodes, edges, refs) {
136 return;
137 }
138
139 add_regex_nodes(
140 file_path,
141 source,
142 Language::Rust,
143 now,
144 nodes,
145 edges,
146 r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*([^{;]*)",
147 NodeKind::Function,
148 );
149 add_regex_nodes(
150 file_path,
151 source,
152 Language::Rust,
153 now,
154 nodes,
155 edges,
156 r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?struct\s+([A-Za-z_][A-Za-z0-9_]*)",
157 NodeKind::Struct,
158 );
159 add_regex_nodes(
160 file_path,
161 source,
162 Language::Rust,
163 now,
164 nodes,
165 edges,
166 r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?trait\s+([A-Za-z_][A-Za-z0-9_]*)",
167 NodeKind::Trait,
168 );
169 add_regex_nodes(
170 file_path,
171 source,
172 Language::Rust,
173 now,
174 nodes,
175 edges,
176 r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?enum\s+([A-Za-z_][A-Za-z0-9_]*)",
177 NodeKind::Enum,
178 );
179 add_regex_nodes(
180 file_path,
181 source,
182 Language::Rust,
183 now,
184 nodes,
185 edges,
186 r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?type\s+([A-Za-z_][A-Za-z0-9_]*)",
187 NodeKind::TypeAlias,
188 );
189
190 let use_re = Regex::new(r"(?m)^\s*use\s+([^;]+);").unwrap();
191 for cap in use_re.captures_iter(source) {
192 let full = cap.get(1).unwrap();
193 let root = full
194 .as_str()
195 .split("::")
196 .next()
197 .unwrap_or(full.as_str())
198 .trim_matches('{')
199 .trim();
200 let node = make_node(
201 file_path,
202 Language::Rust,
203 NodeKind::Import,
204 root,
205 line_for(source, full.start()),
206 0,
207 now,
208 Some(format!("use {};", full.as_str())),
209 );
210 add_contains(nodes, edges, &node);
211 refs.push(unresolved(
212 &nodes[0].id,
213 root,
214 EdgeKind::Imports,
215 file_path,
216 Language::Rust,
217 node.start_line,
218 ));
219 nodes.push(node);
220 }
221
222 let impl_re = Regex::new(
223 r"(?m)^\s*impl(?:<[^>]+>)?\s+([A-Za-z_][A-Za-z0-9_:]*)\s+for\s+([A-Za-z_][A-Za-z0-9_]*)",
224 )
225 .unwrap();
226 for cap in impl_re.captures_iter(source) {
227 let trait_name = cap.get(1).unwrap().as_str().rsplit("::").next().unwrap();
228 let type_name = cap.get(2).unwrap().as_str();
229 if let Some(src) = nodes
230 .iter()
231 .find(|n| n.name == type_name && matches!(n.kind, NodeKind::Struct | NodeKind::Enum))
232 .map(|n| n.id.clone())
233 {
234 refs.push(unresolved(
235 &src,
236 trait_name,
237 EdgeKind::Implements,
238 file_path,
239 Language::Rust,
240 line_for(source, cap.get(1).unwrap().start()),
241 ));
242 }
243 }
244 add_call_refs(
245 file_path,
246 source,
247 Language::Rust,
248 nodes,
249 refs,
250 r"([A-Za-z_][A-Za-z0-9_:]*)\s*\(",
251 );
252}
253
254fn extract_moonbit(
255 file_path: &str,
256 source: &str,
257 now: i64,
258 nodes: &mut Vec<Node>,
259 edges: &mut Vec<Edge>,
260 refs: &mut Vec<UnresolvedReference>,
261) {
262 if file_path.ends_with("moon.mod.json")
263 || file_path.ends_with("moon.pkg.json")
264 || file_path.ends_with("moon.pkg")
265 {
266 extract_moonbit_metadata(file_path, source, now, nodes, edges, refs);
267 return;
268 }
269
270 let source = if file_path.ends_with(".mbt.md") {
271 extract_mbt_markdown_code_with_padding(source)
272 } else {
273 source.to_string()
274 };
275
276 if try_extract_moonbit_tree_sitter(file_path, &source, now, nodes, edges, refs) {
277 return;
278 }
279
280 add_regex_nodes(
281 file_path,
282 &source,
283 Language::MoonBit,
284 now,
285 nodes,
286 edges,
287 r"(?m)^\s*(pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*([^{]*)",
288 NodeKind::Function,
289 );
290 add_regex_nodes(
291 file_path,
292 &source,
293 Language::MoonBit,
294 now,
295 nodes,
296 edges,
297 r"(?m)^\s*(pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*::[A-Za-z_][A-Za-z0-9_]*)\s*([^{]*)",
298 NodeKind::Method,
299 );
300 add_regex_nodes(
301 file_path,
302 &source,
303 Language::MoonBit,
304 now,
305 nodes,
306 edges,
307 r"(?m)^\s*(pub\s+)?struct\s+([A-Za-z_][A-Za-z0-9_]*)",
308 NodeKind::Struct,
309 );
310 add_regex_nodes(
311 file_path,
312 &source,
313 Language::MoonBit,
314 now,
315 nodes,
316 edges,
317 r"(?m)^\s*(pub\s+)?trait\s+([A-Za-z_][A-Za-z0-9_]*)",
318 NodeKind::Trait,
319 );
320 add_regex_nodes(
321 file_path,
322 &source,
323 Language::MoonBit,
324 now,
325 nodes,
326 edges,
327 r"(?m)^\s*(pub\s+)?enum\s+([A-Za-z_][A-Za-z0-9_]*)",
328 NodeKind::Enum,
329 );
330 add_regex_nodes(
331 file_path,
332 &source,
333 Language::MoonBit,
334 now,
335 nodes,
336 edges,
337 r"(?m)^\s*(pub\s+)?type\s+([A-Za-z_][A-Za-z0-9_]*)",
338 NodeKind::TypeAlias,
339 );
340 add_regex_nodes(
341 file_path,
342 &source,
343 Language::MoonBit,
344 now,
345 nodes,
346 edges,
347 r"(?m)^\s*(pub\s+)?let\s+([A-Za-z_][A-Za-z0-9_]*)",
348 NodeKind::Variable,
349 );
350
351 let import_re =
352 Regex::new(r#"(?m)^\s*import\s+([@\w/.\-]+)(?:\s+as\s+([A-Za-z_][A-Za-z0-9_]*))?"#)
353 .unwrap();
354 for cap in import_re.captures_iter(&source) {
355 let package = cap.get(1).unwrap().as_str();
356 let name = cap.get(2).map(|m| m.as_str()).unwrap_or(package);
357 let node = make_node(
358 file_path,
359 Language::MoonBit,
360 NodeKind::Import,
361 name,
362 line_for(&source, cap.get(0).unwrap().start()),
363 0,
364 now,
365 Some(cap.get(0).unwrap().as_str().to_string()),
366 );
367 add_contains(nodes, edges, &node);
368 refs.push(unresolved(
369 &nodes[0].id,
370 name,
371 EdgeKind::Imports,
372 file_path,
373 Language::MoonBit,
374 node.start_line,
375 ));
376 nodes.push(node);
377 }
378 add_call_refs(
379 file_path,
380 &source,
381 Language::MoonBit,
382 nodes,
383 refs,
384 r"([@A-Za-z_][@A-Za-z0-9_:/]*)\s*\(",
385 );
386}
387
388fn extract_moonbit_metadata(
389 file_path: &str,
390 source: &str,
391 now: i64,
392 nodes: &mut Vec<Node>,
393 edges: &mut Vec<Edge>,
394 refs: &mut Vec<UnresolvedReference>,
395) {
396 let Ok(json) = serde_json::from_str::<serde_json::Value>(source) else {
397 return;
398 };
399 if file_path.ends_with("moon.mod.json") {
400 if let Some(name) = json.get("name").and_then(|v| v.as_str()) {
401 let node = make_node(
402 file_path,
403 Language::MoonBit,
404 NodeKind::Module,
405 name,
406 1,
407 0,
408 now,
409 Some("moon.mod.json".into()),
410 );
411 add_contains(nodes, edges, &node);
412 nodes.push(node);
413 }
414 return;
415 }
416
417 let package_name = json
418 .get("name")
419 .and_then(|v| v.as_str())
420 .or_else(|| file_path.rsplit('/').nth(1))
421 .unwrap_or("moonbit-package");
422 let node = make_node(
423 file_path,
424 Language::MoonBit,
425 NodeKind::Module,
426 package_name,
427 1,
428 0,
429 now,
430 Some(file_path.rsplit('/').next().unwrap_or("moon.pkg").into()),
431 );
432 add_contains(nodes, edges, &node);
433 let package_node_id = node.id.clone();
434 nodes.push(node);
435
436 if let Some(imports) = json.get("import").or_else(|| json.get("imports")) {
437 if let Some(obj) = imports.as_object() {
438 for (alias, value) in obj {
439 let target = value.as_str().unwrap_or(alias);
440 let import_node = make_node(
441 file_path,
442 Language::MoonBit,
443 NodeKind::Import,
444 alias,
445 1,
446 0,
447 now,
448 Some(target.to_string()),
449 );
450 add_contains(nodes, edges, &import_node);
451 refs.push(unresolved(
452 &package_node_id,
453 alias,
454 EdgeKind::Imports,
455 file_path,
456 Language::MoonBit,
457 1,
458 ));
459 nodes.push(import_node);
460 }
461 }
462 }
463}
464
465fn try_extract_rust_tree_sitter(
466 file_path: &str,
467 source: &str,
468 now: i64,
469 nodes: &mut Vec<Node>,
470 edges: &mut Vec<Edge>,
471 refs: &mut Vec<UnresolvedReference>,
472) -> bool {
473 let mut parser = Parser::new();
474 if parser
475 .set_language(&tree_sitter_rust::LANGUAGE.into())
476 .is_err()
477 {
478 return false;
479 }
480 let Some(tree) = parser.parse(source, None) else {
481 return false;
482 };
483 if tree.root_node().has_error() {
484 return false;
485 }
486
487 let root = tree.root_node();
488 let mut stack = Vec::new();
489 collect_rust_nodes(file_path, source, root, now, nodes, edges, refs, &mut stack);
490 collect_rust_refs(file_path, source, root, nodes, refs);
491 true
492}
493
494fn collect_rust_nodes(
495 file_path: &str,
496 source: &str,
497 node: SyntaxNode,
498 now: i64,
499 nodes: &mut Vec<Node>,
500 edges: &mut Vec<Edge>,
501 refs: &mut Vec<UnresolvedReference>,
502 stack: &mut Vec<String>,
503) {
504 let kind = match node.kind() {
505 "function_item" => {
506 if rust_receiver_type(node, source).is_some() {
507 Some(NodeKind::Method)
508 } else {
509 Some(NodeKind::Function)
510 }
511 }
512 "struct_item" => Some(NodeKind::Struct),
513 "trait_item" => Some(NodeKind::Trait),
514 "enum_item" => Some(NodeKind::Enum),
515 "enum_variant" => Some(NodeKind::EnumMember),
516 "type_item" => Some(NodeKind::TypeAlias),
517 "const_item" => Some(NodeKind::Constant),
518 "static_item" => Some(NodeKind::Variable),
519 "let_declaration" => Some(NodeKind::Variable),
520 "field_declaration" => Some(NodeKind::Field),
521 "function_signature_item" => Some(NodeKind::Method),
522 "use_declaration" => Some(NodeKind::Import),
523 "mod_item" => Some(NodeKind::Module),
524 _ => None,
525 };
526
527 let mut pushed = false;
528 if let Some(kind) = kind {
529 if let Some(name) = rust_node_name(node, source, kind) {
530 let signature = Some(
531 node_text(node, source)
532 .lines()
533 .next()
534 .unwrap_or("")
535 .trim()
536 .to_string(),
537 );
538 let mut out =
539 make_node_span(file_path, Language::Rust, kind, &name, node, now, signature);
540 out.is_exported = rust_is_public(node, source);
541 out.visibility = if out.is_exported {
542 Some("public".into())
543 } else if matches!(
544 kind,
545 NodeKind::Function
546 | NodeKind::Method
547 | NodeKind::Struct
548 | NodeKind::Trait
549 | NodeKind::Enum
550 | NodeKind::TypeAlias
551 ) {
552 Some("private".into())
553 } else {
554 None
555 };
556 out.is_async = node_text(node, source).trim_start().starts_with("async ")
557 || node_text(node, source).contains(" async fn ");
558 if kind == NodeKind::Method {
559 if let Some(owner) = rust_receiver_type(node, source) {
560 out.qualified_name = format!("{owner}::{name}");
561 }
562 }
563 add_contains_from_stack(nodes, edges, stack, &out, "tree-sitter");
564 let id = out.id.clone();
565 nodes.push(out);
566 if matches!(
567 kind,
568 NodeKind::Struct
569 | NodeKind::Trait
570 | NodeKind::Enum
571 | NodeKind::Module
572 | NodeKind::Function
573 | NodeKind::Method
574 ) {
575 stack.push(id);
576 pushed = true;
577 }
578 }
579 }
580
581 if node.kind() == "impl_item" {
582 if let Some((trait_name, type_name)) = rust_impl_trait_for_type(node, source) {
583 if let Some(type_node) = nodes.iter().find(|n| {
584 n.name == type_name
585 && matches!(n.kind, NodeKind::Struct | NodeKind::Enum | NodeKind::Trait)
586 }) {
587 refs_push(
588 refs,
589 &type_node.id,
590 &trait_name,
591 EdgeKind::Implements,
592 file_path,
593 Language::Rust,
594 node.start_position().row as i64 + 1,
595 node.start_position().column as i64,
596 );
597 }
598 }
599 }
600
601 for child in named_children(node) {
602 collect_rust_nodes(file_path, source, child, now, nodes, edges, refs, stack);
603 }
604
605 if pushed {
606 stack.pop();
607 }
608}
609
610fn collect_rust_refs(
611 file_path: &str,
612 source: &str,
613 node: SyntaxNode,
614 nodes: &[Node],
615 refs: &mut Vec<UnresolvedReference>,
616) {
617 match node.kind() {
618 "use_declaration" => {
619 if let Some(name) = rust_import_root(node, source) {
620 refs_push(
621 refs,
622 &format!("file:{file_path}"),
623 &name,
624 EdgeKind::Imports,
625 file_path,
626 Language::Rust,
627 node.start_position().row as i64 + 1,
628 node.start_position().column as i64,
629 );
630 }
631 }
632 "call_expression" => {
633 if let Some(function) = node.child_by_field_name("function") {
634 if let Some(name) = callable_name(function, source) {
635 if let Some(caller) =
636 enclosing_callable(nodes, node.start_position().row as i64 + 1)
637 {
638 refs_push(
639 refs,
640 &caller.id,
641 &name,
642 EdgeKind::Calls,
643 file_path,
644 Language::Rust,
645 node.start_position().row as i64 + 1,
646 node.start_position().column as i64,
647 );
648 }
649 }
650 }
651 }
652 _ => {}
653 }
654
655 for child in named_children(node) {
656 collect_rust_refs(file_path, source, child, nodes, refs);
657 }
658}
659
660fn try_extract_moonbit_tree_sitter(
661 file_path: &str,
662 source: &str,
663 now: i64,
664 nodes: &mut Vec<Node>,
665 edges: &mut Vec<Edge>,
666 refs: &mut Vec<UnresolvedReference>,
667) -> bool {
668 let mut parser = Parser::new();
669 if parser
670 .set_language(&tree_sitter_moonbit::LANGUAGE.into())
671 .is_err()
672 {
673 return false;
674 }
675 let Some(tree) = parser.parse(source, None) else {
676 return false;
677 };
678 if tree.root_node().has_error() {
679 return false;
680 }
681
682 let root = tree.root_node();
683 let mut stack = Vec::new();
684 collect_moonbit_nodes(file_path, source, root, now, nodes, edges, &mut stack);
685 collect_moonbit_refs(file_path, source, root, nodes, refs);
686 true
687}
688
689fn collect_moonbit_nodes(
690 file_path: &str,
691 source: &str,
692 node: SyntaxNode,
693 now: i64,
694 nodes: &mut Vec<Node>,
695 edges: &mut Vec<Edge>,
696 stack: &mut Vec<String>,
697) {
698 let kind = match node.kind() {
699 "function_definition" => Some(NodeKind::Function),
700 "impl_definition" => Some(NodeKind::Method),
701 "struct_definition" | "tuple_struct_definition" => Some(NodeKind::Struct),
702 "trait_definition" => Some(NodeKind::Trait),
703 "trait_method_declaration" => Some(NodeKind::Method),
704 "enum_definition" => Some(NodeKind::Enum),
705 "enum_constructor" => Some(NodeKind::EnumMember),
706 "type_alias_definition" | "type_definition" => Some(NodeKind::TypeAlias),
707 "const_definition" => Some(NodeKind::Constant),
708 "import_declaration" => Some(NodeKind::Import),
709 "package_declaration" => Some(NodeKind::Module),
710 _ => None,
711 };
712
713 let mut pushed = false;
714 if let Some(kind) = kind {
715 if let Some(name) = moonbit_node_name(node, source, kind) {
716 let signature = Some(
717 node_text(node, source)
718 .lines()
719 .next()
720 .unwrap_or("")
721 .trim()
722 .to_string(),
723 );
724 let mut out = make_node_span(
725 file_path,
726 Language::MoonBit,
727 kind,
728 &name,
729 node,
730 now,
731 signature,
732 );
733 out.is_exported = moonbit_is_public(node, source);
734 out.visibility = if out.is_exported {
735 Some("public".into())
736 } else {
737 None
738 };
739 if kind == NodeKind::Method {
740 if let Some(owner) = moonbit_impl_owner(node, source) {
741 out.qualified_name = format!("{owner}::{name}");
742 }
743 }
744 add_contains_from_stack(nodes, edges, stack, &out, "tree-sitter");
745 let id = out.id.clone();
746 nodes.push(out);
747 if matches!(
748 kind,
749 NodeKind::Struct
750 | NodeKind::Trait
751 | NodeKind::Enum
752 | NodeKind::Module
753 | NodeKind::Function
754 | NodeKind::Method
755 ) {
756 stack.push(id);
757 pushed = true;
758 }
759 }
760 }
761
762 for child in named_children(node) {
763 collect_moonbit_nodes(file_path, source, child, now, nodes, edges, stack);
764 }
765
766 if pushed {
767 stack.pop();
768 }
769}
770
771fn collect_moonbit_refs(
772 file_path: &str,
773 source: &str,
774 node: SyntaxNode,
775 nodes: &[Node],
776 refs: &mut Vec<UnresolvedReference>,
777) {
778 match node.kind() {
779 "import_declaration" => {
780 for child in named_children(node) {
781 if child.kind() == "import_item" {
782 if let Some(name) = moonbit_import_name(child, source) {
783 refs_push(
784 refs,
785 &format!("file:{file_path}"),
786 &name,
787 EdgeKind::Imports,
788 file_path,
789 Language::MoonBit,
790 child.start_position().row as i64 + 1,
791 child.start_position().column as i64,
792 );
793 }
794 }
795 }
796 }
797 "apply_expression" | "dot_apply_expression" | "dot_dot_apply_expression" => {
798 if let Some(name) = moonbit_call_name(node, source) {
799 if let Some(caller) =
800 enclosing_callable(nodes, node.start_position().row as i64 + 1)
801 {
802 refs_push(
803 refs,
804 &caller.id,
805 &name,
806 EdgeKind::Calls,
807 file_path,
808 Language::MoonBit,
809 node.start_position().row as i64 + 1,
810 node.start_position().column as i64,
811 );
812 }
813 }
814 }
815 _ => {}
816 }
817
818 for child in named_children(node) {
819 collect_moonbit_refs(file_path, source, child, nodes, refs);
820 }
821}
822
823fn extract_generic(
824 file_path: &str,
825 source: &str,
826 language: Language,
827 now: i64,
828 nodes: &mut Vec<Node>,
829 edges: &mut Vec<Edge>,
830 refs: &mut Vec<UnresolvedReference>,
831) {
832 add_regex_nodes(
833 file_path,
834 source,
835 language,
836 now,
837 nodes,
838 edges,
839 r"(?m)^\s*(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)",
840 NodeKind::Function,
841 );
842 add_regex_nodes(
843 file_path,
844 source,
845 language,
846 now,
847 nodes,
848 edges,
849 r"(?m)^\s*(?:export\s+)?class\s+([A-Za-z_$][A-Za-z0-9_$]*)",
850 NodeKind::Class,
851 );
852 add_call_refs(
853 file_path,
854 source,
855 language,
856 nodes,
857 refs,
858 r"([A-Za-z_$][A-Za-z0-9_$.]*)\s*\(",
859 );
860}
861
862fn add_regex_nodes(
863 file_path: &str,
864 source: &str,
865 language: Language,
866 now: i64,
867 nodes: &mut Vec<Node>,
868 edges: &mut Vec<Edge>,
869 pattern: &str,
870 kind: NodeKind,
871) {
872 let re = Regex::new(pattern).unwrap();
873 for cap in re.captures_iter(source) {
874 let Some(name_match) = cap.get(2).or_else(|| cap.get(1)) else {
875 continue;
876 };
877 let mut name = name_match.as_str().to_string();
878 if kind == NodeKind::Method && name.contains("::") {
879 name = name.rsplit("::").next().unwrap_or(&name).to_string();
880 }
881 let signature = cap.get(0).map(|m| m.as_str().trim().to_string());
882 let line = line_for(source, name_match.start());
883 let mut node = make_node(file_path, language, kind, &name, line, 0, now, signature);
884 node.is_exported = cap
885 .get(1)
886 .map(|m| m.as_str().contains("pub") || m.as_str().contains("export"))
887 .unwrap_or(false);
888 node.visibility = if node.is_exported {
889 Some("public".into())
890 } else {
891 None
892 };
893 add_contains(nodes, edges, &node);
894 nodes.push(node);
895 }
896}
897
898fn add_call_refs(
899 file_path: &str,
900 source: &str,
901 language: Language,
902 nodes: &[Node],
903 refs: &mut Vec<UnresolvedReference>,
904 pattern: &str,
905) {
906 let re = Regex::new(pattern).unwrap();
907 let keywords = [
908 "if", "for", "while", "match", "return", "fn", "test", "inspect", "Some", "Ok", "Err",
909 ];
910 for cap in re.captures_iter(source) {
911 let name = cap.get(1).unwrap().as_str().rsplit("::").next().unwrap();
912 if keywords.contains(&name) {
913 continue;
914 }
915 let line = line_for(source, cap.get(1).unwrap().start());
916 if let Some(caller) = nodes
917 .iter()
918 .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
919 .rev()
920 .find(|n| n.start_line <= line)
921 {
922 refs.push(unresolved(
923 &caller.id,
924 name,
925 EdgeKind::Calls,
926 file_path,
927 language,
928 line,
929 ));
930 }
931 }
932}
933
934fn make_node(
935 file_path: &str,
936 language: Language,
937 kind: NodeKind,
938 name: &str,
939 line: i64,
940 col: i64,
941 now: i64,
942 signature: Option<String>,
943) -> Node {
944 Node {
945 id: format!("{}:{}:{}:{}", kind.as_str(), file_path, name, line),
946 kind,
947 name: name.to_string(),
948 qualified_name: name.to_string(),
949 file_path: file_path.to_string(),
950 language,
951 start_line: line,
952 end_line: line,
953 start_column: col,
954 end_column: col,
955 docstring: None,
956 signature,
957 visibility: None,
958 is_exported: false,
959 is_async: false,
960 is_static: false,
961 is_abstract: false,
962 updated_at: now,
963 }
964}
965
966fn make_node_span(
967 file_path: &str,
968 language: Language,
969 kind: NodeKind,
970 name: &str,
971 node: SyntaxNode,
972 now: i64,
973 signature: Option<String>,
974) -> Node {
975 let start = node.start_position();
976 let end = node.end_position();
977 Node {
978 id: format!("{}:{}:{}:{}", kind.as_str(), file_path, name, start.row + 1),
979 kind,
980 name: name.to_string(),
981 qualified_name: name.to_string(),
982 file_path: file_path.to_string(),
983 language,
984 start_line: start.row as i64 + 1,
985 end_line: end.row as i64 + 1,
986 start_column: start.column as i64,
987 end_column: end.column as i64,
988 docstring: None,
989 signature,
990 visibility: None,
991 is_exported: false,
992 is_async: false,
993 is_static: false,
994 is_abstract: false,
995 updated_at: now,
996 }
997}
998
999fn add_contains(nodes: &[Node], edges: &mut Vec<Edge>, node: &Node) {
1000 if let Some(file) = nodes.first() {
1001 edges.push(Edge {
1002 id: None,
1003 source: file.id.clone(),
1004 target: node.id.clone(),
1005 kind: EdgeKind::Contains,
1006 line: None,
1007 col: None,
1008 provenance: Some("regex".into()),
1009 });
1010 }
1011}
1012
1013fn add_contains_from_stack(
1014 nodes: &[Node],
1015 edges: &mut Vec<Edge>,
1016 stack: &[String],
1017 node: &Node,
1018 provenance: &str,
1019) {
1020 let source = stack
1021 .last()
1022 .cloned()
1023 .or_else(|| nodes.first().map(|n| n.id.clone()));
1024 if let Some(source) = source {
1025 edges.push(Edge {
1026 id: None,
1027 source,
1028 target: node.id.clone(),
1029 kind: EdgeKind::Contains,
1030 line: None,
1031 col: None,
1032 provenance: Some(provenance.into()),
1033 });
1034 }
1035}
1036
1037fn unresolved(
1038 from: &str,
1039 name: &str,
1040 kind: EdgeKind,
1041 file_path: &str,
1042 language: Language,
1043 line: i64,
1044) -> UnresolvedReference {
1045 UnresolvedReference {
1046 from_node_id: from.to_string(),
1047 reference_name: name.to_string(),
1048 reference_kind: kind,
1049 line,
1050 column: 0,
1051 file_path: file_path.to_string(),
1052 language,
1053 }
1054}
1055
1056fn refs_push(
1057 refs: &mut Vec<UnresolvedReference>,
1058 from: &str,
1059 name: &str,
1060 kind: EdgeKind,
1061 file_path: &str,
1062 language: Language,
1063 line: i64,
1064 column: i64,
1065) {
1066 if !name.is_empty() {
1067 refs.push(UnresolvedReference {
1068 from_node_id: from.to_string(),
1069 reference_name: name.to_string(),
1070 reference_kind: kind,
1071 line,
1072 column,
1073 file_path: file_path.to_string(),
1074 language,
1075 });
1076 }
1077}
1078
1079fn named_children(node: SyntaxNode) -> Vec<SyntaxNode> {
1080 (0..node.named_child_count())
1081 .filter_map(|i| node.named_child(i as u32))
1082 .collect()
1083}
1084
1085fn node_text<'a>(node: SyntaxNode, source: &'a str) -> &'a str {
1086 source.get(node.byte_range()).unwrap_or_default()
1087}
1088
1089fn child_text_by_kind<'a>(node: SyntaxNode, source: &'a str, kinds: &[&str]) -> Option<&'a str> {
1090 named_children(node)
1091 .into_iter()
1092 .find(|child| kinds.contains(&child.kind()))
1093 .map(|child| node_text(child, source))
1094}
1095
1096fn descendant_text_by_kind<'a>(
1097 node: SyntaxNode,
1098 source: &'a str,
1099 kinds: &[&str],
1100) -> Option<&'a str> {
1101 if kinds.contains(&node.kind()) {
1102 return Some(node_text(node, source));
1103 }
1104 for child in named_children(node) {
1105 if let Some(text) = descendant_text_by_kind(child, source, kinds) {
1106 return Some(text);
1107 }
1108 }
1109 None
1110}
1111
1112fn rust_node_name(node: SyntaxNode, source: &str, kind: NodeKind) -> Option<String> {
1113 if kind == NodeKind::Import {
1114 return rust_import_root(node, source);
1115 }
1116 if kind == NodeKind::Variable && node.kind() == "let_declaration" {
1117 return descendant_text_by_kind(node, source, &["identifier"]).map(clean_symbol_name);
1118 }
1119 if kind == NodeKind::Field {
1120 return child_text_by_kind(node, source, &["field_identifier", "identifier"])
1121 .map(clean_symbol_name);
1122 }
1123 node.child_by_field_name("name")
1124 .map(|n| clean_symbol_name(node_text(n, source)))
1125 .or_else(|| {
1126 child_text_by_kind(
1127 node,
1128 source,
1129 &["identifier", "type_identifier", "field_identifier"],
1130 )
1131 .map(clean_symbol_name)
1132 })
1133}
1134
1135fn rust_is_public(node: SyntaxNode, source: &str) -> bool {
1136 node_text(node, source).trim_start().starts_with("pub")
1137 || named_children(node).into_iter().any(|child| {
1138 child.kind() == "visibility_modifier" && node_text(child, source).contains("pub")
1139 })
1140}
1141
1142fn rust_receiver_type(node: SyntaxNode, source: &str) -> Option<String> {
1143 let mut parent = node.parent();
1144 while let Some(p) = parent {
1145 if p.kind() == "impl_item" {
1146 let mut direct = named_children(p)
1147 .into_iter()
1148 .filter(|child| {
1149 matches!(
1150 child.kind(),
1151 "type_identifier" | "generic_type" | "scoped_type_identifier"
1152 )
1153 })
1154 .collect::<Vec<_>>();
1155 if let Some(last) = direct.pop() {
1156 return Some(clean_type_name(node_text(last, source)));
1157 }
1158 return descendant_text_by_kind(p, source, &["type_identifier"]).map(clean_type_name);
1159 }
1160 parent = p.parent();
1161 }
1162 None
1163}
1164
1165fn rust_impl_trait_for_type(node: SyntaxNode, source: &str) -> Option<(String, String)> {
1166 if node.kind() != "impl_item" || !node_text(node, source).contains(" for ") {
1167 return None;
1168 }
1169 let names: Vec<String> = named_children(node)
1170 .into_iter()
1171 .filter(|child| {
1172 matches!(
1173 child.kind(),
1174 "type_identifier" | "generic_type" | "scoped_type_identifier"
1175 )
1176 })
1177 .map(|child| clean_type_name(node_text(child, source)))
1178 .collect();
1179 if names.len() >= 2 {
1180 Some((names[0].clone(), names[names.len() - 1].clone()))
1181 } else {
1182 None
1183 }
1184}
1185
1186fn rust_import_root(node: SyntaxNode, source: &str) -> Option<String> {
1187 let text = node_text(node, source)
1188 .trim()
1189 .strip_prefix("use")
1190 .unwrap_or(node_text(node, source))
1191 .trim()
1192 .trim_end_matches(';')
1193 .trim();
1194 text.split("::")
1195 .next()
1196 .map(|s| s.trim_matches('{').trim().to_string())
1197 .filter(|s| !s.is_empty())
1198}
1199
1200fn callable_name(node: SyntaxNode, source: &str) -> Option<String> {
1201 match node.kind() {
1202 "identifier" | "field_identifier" => Some(clean_symbol_name(node_text(node, source))),
1203 "scoped_identifier" => node_text(node, source)
1204 .rsplit("::")
1205 .next()
1206 .map(clean_symbol_name),
1207 "field_expression" => node
1208 .child_by_field_name("field")
1209 .map(|field| clean_symbol_name(node_text(field, source))),
1210 "generic_function" => named_children(node)
1211 .into_iter()
1212 .find_map(|child| callable_name(child, source)),
1213 _ => None,
1214 }
1215}
1216
1217fn moonbit_node_name(node: SyntaxNode, source: &str, kind: NodeKind) -> Option<String> {
1218 match kind {
1219 NodeKind::Function | NodeKind::Method => child_text_by_kind(
1220 node,
1221 source,
1222 &["function_identifier", "lowercase_identifier", "identifier"],
1223 )
1224 .map(|s| clean_symbol_name(s.rsplit("::").next().unwrap_or(s))),
1225 NodeKind::Struct | NodeKind::Trait | NodeKind::Enum => child_text_by_kind(
1226 node,
1227 source,
1228 &[
1229 "identifier",
1230 "type_identifier",
1231 "type_name",
1232 "uppercase_identifier",
1233 ],
1234 )
1235 .map(clean_symbol_name),
1236 NodeKind::EnumMember => child_text_by_kind(
1237 node,
1238 source,
1239 &["uppercase_identifier", "identifier", "type_name"],
1240 )
1241 .map(clean_symbol_name),
1242 NodeKind::TypeAlias => descendant_text_by_kind(
1243 node,
1244 source,
1245 &[
1246 "type_identifier",
1247 "type_name",
1248 "identifier",
1249 "uppercase_identifier",
1250 ],
1251 )
1252 .map(clean_symbol_name),
1253 NodeKind::Constant => {
1254 child_text_by_kind(node, source, &["uppercase_identifier", "identifier"])
1255 .map(clean_symbol_name)
1256 }
1257 NodeKind::Import => moonbit_import_name(node, source),
1258 NodeKind::Module => node
1259 .named_child(0)
1260 .map(|child| clean_quoted(node_text(child, source))),
1261 _ => None,
1262 }
1263}
1264
1265fn moonbit_is_public(node: SyntaxNode, source: &str) -> bool {
1266 named_children(node)
1267 .into_iter()
1268 .any(|child| child.kind() == "visibility" && node_text(child, source).contains("pub"))
1269 || node_text(node, source).trim_start().starts_with("pub ")
1270}
1271
1272fn moonbit_impl_owner(node: SyntaxNode, source: &str) -> Option<String> {
1273 child_text_by_kind(
1274 node,
1275 source,
1276 &["type_name", "type_identifier", "qualified_type_identifier"],
1277 )
1278 .map(clean_type_name)
1279}
1280
1281fn moonbit_import_name(node: SyntaxNode, source: &str) -> Option<String> {
1282 if node.kind() == "import_declaration" {
1283 return named_children(node)
1284 .into_iter()
1285 .find(|child| child.kind() == "import_item")
1286 .and_then(|child| moonbit_import_name(child, source));
1287 }
1288 named_children(node)
1289 .into_iter()
1290 .find(|child| child.kind() == "string_literal")
1291 .map(|child| clean_quoted(node_text(child, source)))
1292}
1293
1294fn moonbit_call_name(node: SyntaxNode, source: &str) -> Option<String> {
1295 for child in named_children(node) {
1296 match child.kind() {
1297 "qualified_identifier" | "function_identifier" | "method_expression" => {
1298 let text = node_text(child, source);
1299 let name = text
1300 .rsplit(['.', ':'])
1301 .find(|part| !part.is_empty())
1302 .unwrap_or(text);
1303 return Some(clean_symbol_name(name));
1304 }
1305 "lowercase_identifier" | "identifier" => {
1306 return Some(clean_symbol_name(node_text(child, source)));
1307 }
1308 _ => {}
1309 }
1310 }
1311 None
1312}
1313
1314fn enclosing_callable(nodes: &[Node], line: i64) -> Option<&Node> {
1315 nodes
1316 .iter()
1317 .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
1318 .filter(|n| n.start_line <= line && line <= n.end_line.max(n.start_line))
1319 .min_by_key(|n| n.end_line - n.start_line)
1320}
1321
1322fn clean_symbol_name(s: &str) -> String {
1323 s.trim()
1324 .trim_matches('"')
1325 .trim_matches('\'')
1326 .trim_start_matches('.')
1327 .to_string()
1328}
1329
1330fn clean_quoted(s: &str) -> String {
1331 s.trim().trim_matches('"').trim_matches('\'').to_string()
1332}
1333
1334fn clean_type_name(s: &str) -> String {
1335 let s = s.trim();
1336 let before_generics = s.split('<').next().unwrap_or(s);
1337 before_generics
1338 .rsplit("::")
1339 .next()
1340 .unwrap_or(before_generics)
1341 .trim()
1342 .to_string()
1343}
1344
1345fn line_for(source: &str, idx: usize) -> i64 {
1346 source[..idx.min(source.len())]
1347 .bytes()
1348 .filter(|b| *b == b'\n')
1349 .count() as i64
1350 + 1
1351}
1352
1353fn extract_mbt_markdown_code_with_padding(source: &str) -> String {
1354 let mut out = String::new();
1355 let mut in_mbt = false;
1356 for line in source.lines() {
1357 let trimmed = line.trim_start();
1358 if trimmed.starts_with("```") {
1359 in_mbt = trimmed.contains("mbt");
1360 out.push('\n');
1361 continue;
1362 }
1363 if in_mbt {
1364 out.push_str(line);
1365 }
1366 out.push('\n');
1367 }
1368 out
1369}
1370
1371fn now_ms() -> i64 {
1372 std::time::SystemTime::now()
1373 .duration_since(std::time::UNIX_EPOCH)
1374 .map(|d| d.as_millis() as i64)
1375 .unwrap_or_default()
1376}