1use crate::config::CodeGraphConfig;
2use crate::types::*;
3use regex::Regex;
4use std::path::Path;
5use tree_sitter::{Node as SyntaxNode, Parser};
6
7type ExtractorFn = fn(
8 file_path: &str,
9 source: &str,
10 language: Language,
11 now: i64,
12 nodes: &mut Vec<Node>,
13 edges: &mut Vec<Edge>,
14 refs: &mut Vec<UnresolvedReference>,
15);
16
17#[derive(Clone, Copy)]
18struct LanguageExtractor {
19 name: &'static str,
20 languages: &'static [Language],
21 extract: ExtractorFn,
22}
23
24const RUST_LANGUAGES: &[Language] = &[Language::Rust];
25const MOONBIT_LANGUAGES: &[Language] = &[Language::MoonBit];
26const PYTHON_LANGUAGES: &[Language] = &[Language::Python];
27const GO_LANGUAGES: &[Language] = &[Language::Go];
28const JAVA_KOTLIN_LANGUAGES: &[Language] = &[Language::Java, Language::Kotlin];
29const CSHARP_LANGUAGES: &[Language] = &[Language::CSharp];
30const PHP_RUBY_LANGUAGES: &[Language] = &[Language::Php, Language::Ruby];
31const SWIFT_LANGUAGES: &[Language] = &[Language::Swift];
32const DART_PASCAL_SCALA_LANGUAGES: &[Language] =
33 &[Language::Dart, Language::Pascal, Language::Scala];
34const LIQUID_VUE_SVELTE_LANGUAGES: &[Language] =
35 &[Language::Liquid, Language::Vue, Language::Svelte];
36const TYPESCRIPT_JAVASCRIPT_LANGUAGES: &[Language] = &[
37 Language::TypeScript,
38 Language::Tsx,
39 Language::JavaScript,
40 Language::Jsx,
41];
42const GENERIC_LANGUAGES: &[Language] = &[Language::C, Language::Cpp, Language::Unknown];
43
44const LANGUAGE_EXTRACTORS: &[LanguageExtractor] = &[
45 LanguageExtractor {
46 name: "rust",
47 languages: RUST_LANGUAGES,
48 extract: extract_rust_entry,
49 },
50 LanguageExtractor {
51 name: "moonbit",
52 languages: MOONBIT_LANGUAGES,
53 extract: extract_moonbit_entry,
54 },
55 LanguageExtractor {
56 name: "typescript_javascript",
57 languages: TYPESCRIPT_JAVASCRIPT_LANGUAGES,
58 extract: extract_typescript_javascript_entry,
59 },
60 LanguageExtractor {
61 name: "python",
62 languages: PYTHON_LANGUAGES,
63 extract: extract_python_entry,
64 },
65 LanguageExtractor {
66 name: "go",
67 languages: GO_LANGUAGES,
68 extract: extract_go_entry,
69 },
70 LanguageExtractor {
71 name: "java_kotlin",
72 languages: JAVA_KOTLIN_LANGUAGES,
73 extract: extract_java_kotlin_entry,
74 },
75 LanguageExtractor {
76 name: "csharp",
77 languages: CSHARP_LANGUAGES,
78 extract: extract_csharp_entry,
79 },
80 LanguageExtractor {
81 name: "php_ruby",
82 languages: PHP_RUBY_LANGUAGES,
83 extract: extract_php_ruby_entry,
84 },
85 LanguageExtractor {
86 name: "swift",
87 languages: SWIFT_LANGUAGES,
88 extract: extract_swift_entry,
89 },
90 LanguageExtractor {
91 name: "dart_pascal_scala",
92 languages: DART_PASCAL_SCALA_LANGUAGES,
93 extract: extract_dart_pascal_scala_entry,
94 },
95 LanguageExtractor {
96 name: "liquid_vue_svelte",
97 languages: LIQUID_VUE_SVELTE_LANGUAGES,
98 extract: extract_liquid_vue_svelte_entry,
99 },
100 LanguageExtractor {
101 name: "generic",
102 languages: GENERIC_LANGUAGES,
103 extract: extract_generic_entry,
104 },
105];
106
107pub fn should_include_file(path: &Path, config: &CodeGraphConfig) -> bool {
108 let s = path.to_string_lossy().replace('\\', "/");
109 if s.starts_with(".codegraph/") {
110 return false;
111 }
112 if config.exclude.iter().any(|p| glob_match(p, &s)) {
113 return false;
114 }
115 config.include.iter().any(|p| glob_match(p, &s))
116}
117
118fn glob_match(pattern: &str, path: &str) -> bool {
119 let suffix = pattern.strip_prefix("**/*.");
120 if let Some(ext) = suffix {
121 return path.ends_with(&format!(".{}", ext));
122 }
123 if let Some(dir) = pattern
124 .strip_prefix("**/")
125 .and_then(|p| p.strip_suffix("/**"))
126 {
127 return path.contains(&format!("{}/", dir)) || path == dir;
128 }
129 if let Some(suffix) = pattern.strip_prefix("**/") {
130 return path.ends_with(suffix);
131 }
132 pattern == path
133}
134
135pub fn detect_language(path: &Path, _source: &str) -> Language {
136 let name = path
137 .file_name()
138 .and_then(|s| s.to_str())
139 .unwrap_or_default()
140 .to_lowercase();
141 if name == "moon.mod.json" || name == "moon.pkg.json" || name == "moon.pkg" {
142 return Language::MoonBit;
143 }
144 if name.ends_with(".mbt.md") {
145 return Language::MoonBit;
146 }
147 match path
148 .extension()
149 .and_then(|s| s.to_str())
150 .unwrap_or_default()
151 .to_lowercase()
152 .as_str()
153 {
154 "ts" => Language::TypeScript,
155 "tsx" => Language::Tsx,
156 "js" | "mjs" | "cjs" => Language::JavaScript,
157 "jsx" => Language::Jsx,
158 "py" | "pyw" => Language::Python,
159 "go" => Language::Go,
160 "rs" => Language::Rust,
161 "java" => Language::Java,
162 "c" | "h" => Language::C,
163 "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Language::Cpp,
164 "cs" => Language::CSharp,
165 "php" => Language::Php,
166 "rb" | "rake" => Language::Ruby,
167 "swift" => Language::Swift,
168 "kt" | "kts" => Language::Kotlin,
169 "dart" => Language::Dart,
170 "svelte" => Language::Svelte,
171 "vue" => Language::Vue,
172 "liquid" => Language::Liquid,
173 "pas" | "dpr" | "dpk" | "lpr" | "dfm" | "fmx" => Language::Pascal,
174 "scala" | "sc" => Language::Scala,
175 "mbt" | "mbti" => Language::MoonBit,
176 _ => Language::Unknown,
177 }
178}
179
180pub fn detect_parse_error(source: &str, language: Language) -> bool {
181 match language {
182 Language::Rust => tree_sitter_has_error(source, tree_sitter_rust::LANGUAGE.into()),
183 _ => false,
184 }
185}
186
187fn tree_sitter_has_error(source: &str, language: tree_sitter::Language) -> bool {
188 let mut parser = Parser::new();
189 if parser.set_language(&language).is_err() {
190 return false;
191 }
192 parser
193 .parse(source, None)
194 .map(|tree| tree.root_node().has_error())
195 .unwrap_or(false)
196}
197
198pub fn extract_from_source(path: &Path, source: &str, language: Language) -> ExtractionResult {
199 let file_path = path.to_string_lossy().replace('\\', "/");
200 let now = now_ms();
201 let mut nodes = vec![Node {
202 id: format!("file:{}", file_path),
203 kind: NodeKind::File,
204 name: path
205 .file_name()
206 .and_then(|s| s.to_str())
207 .unwrap_or(&file_path)
208 .to_string(),
209 qualified_name: file_path.clone(),
210 file_path: file_path.clone(),
211 language,
212 start_line: 1,
213 end_line: source.lines().count().max(1) as i64,
214 start_column: 0,
215 end_column: 0,
216 docstring: None,
217 signature: None,
218 visibility: None,
219 is_exported: false,
220 is_async: false,
221 is_static: false,
222 is_abstract: false,
223 updated_at: now,
224 }];
225 let mut edges = Vec::new();
226 let mut refs = Vec::new();
227
228 let extractor = extractor_for_language(language);
229 (extractor.extract)(
230 &file_path, &source, language, now, &mut nodes, &mut edges, &mut refs,
231 );
232
233 ExtractionResult {
234 nodes,
235 edges,
236 unresolved_references: refs,
237 }
238}
239
240pub fn registered_extractor_name(language: Language) -> &'static str {
241 extractor_for_language(language).name
242}
243
244fn extractor_for_language(language: Language) -> LanguageExtractor {
245 LANGUAGE_EXTRACTORS
246 .iter()
247 .copied()
248 .find(|extractor| extractor.languages.contains(&language))
249 .unwrap_or(LanguageExtractor {
250 name: "generic",
251 languages: &[],
252 extract: extract_generic_entry,
253 })
254}
255
256fn extract_rust_entry(
257 file_path: &str,
258 source: &str,
259 _language: Language,
260 now: i64,
261 nodes: &mut Vec<Node>,
262 edges: &mut Vec<Edge>,
263 refs: &mut Vec<UnresolvedReference>,
264) {
265 extract_rust(file_path, source, now, nodes, edges, refs);
266}
267
268fn extract_moonbit_entry(
269 file_path: &str,
270 source: &str,
271 _language: Language,
272 now: i64,
273 nodes: &mut Vec<Node>,
274 edges: &mut Vec<Edge>,
275 refs: &mut Vec<UnresolvedReference>,
276) {
277 extract_moonbit(file_path, source, now, nodes, edges, refs);
278}
279
280fn extract_typescript_javascript_entry(
281 file_path: &str,
282 source: &str,
283 language: Language,
284 now: i64,
285 nodes: &mut Vec<Node>,
286 edges: &mut Vec<Edge>,
287 refs: &mut Vec<UnresolvedReference>,
288) {
289 extract_typescript_javascript(file_path, source, language, now, nodes, edges, refs);
290}
291
292fn extract_python_entry(
293 file_path: &str,
294 source: &str,
295 _language: Language,
296 now: i64,
297 nodes: &mut Vec<Node>,
298 edges: &mut Vec<Edge>,
299 refs: &mut Vec<UnresolvedReference>,
300) {
301 extract_python(file_path, source, now, nodes, edges, refs);
302}
303
304fn extract_go_entry(
305 file_path: &str,
306 source: &str,
307 _language: Language,
308 now: i64,
309 nodes: &mut Vec<Node>,
310 edges: &mut Vec<Edge>,
311 refs: &mut Vec<UnresolvedReference>,
312) {
313 extract_go(file_path, source, now, nodes, edges, refs);
314}
315
316fn extract_java_kotlin_entry(
317 file_path: &str,
318 source: &str,
319 language: Language,
320 now: i64,
321 nodes: &mut Vec<Node>,
322 edges: &mut Vec<Edge>,
323 refs: &mut Vec<UnresolvedReference>,
324) {
325 extract_java_kotlin(file_path, source, language, now, nodes, edges, refs);
326}
327
328fn extract_csharp_entry(
329 file_path: &str,
330 source: &str,
331 _language: Language,
332 now: i64,
333 nodes: &mut Vec<Node>,
334 edges: &mut Vec<Edge>,
335 refs: &mut Vec<UnresolvedReference>,
336) {
337 extract_csharp(file_path, source, now, nodes, edges, refs);
338}
339
340fn extract_php_ruby_entry(
341 file_path: &str,
342 source: &str,
343 language: Language,
344 now: i64,
345 nodes: &mut Vec<Node>,
346 edges: &mut Vec<Edge>,
347 refs: &mut Vec<UnresolvedReference>,
348) {
349 extract_php_ruby(file_path, source, language, now, nodes, edges, refs);
350}
351
352fn extract_swift_entry(
353 file_path: &str,
354 source: &str,
355 _language: Language,
356 now: i64,
357 nodes: &mut Vec<Node>,
358 edges: &mut Vec<Edge>,
359 refs: &mut Vec<UnresolvedReference>,
360) {
361 extract_swift(file_path, source, now, nodes, edges, refs);
362}
363
364fn extract_dart_pascal_scala_entry(
365 file_path: &str,
366 source: &str,
367 language: Language,
368 now: i64,
369 nodes: &mut Vec<Node>,
370 edges: &mut Vec<Edge>,
371 refs: &mut Vec<UnresolvedReference>,
372) {
373 extract_dart_pascal_scala(file_path, source, language, now, nodes, edges, refs);
374}
375
376fn extract_liquid_vue_svelte_entry(
377 file_path: &str,
378 source: &str,
379 language: Language,
380 now: i64,
381 nodes: &mut Vec<Node>,
382 edges: &mut Vec<Edge>,
383 refs: &mut Vec<UnresolvedReference>,
384) {
385 extract_liquid_vue_svelte(file_path, source, language, now, nodes, edges, refs);
386}
387
388fn extract_generic_entry(
389 file_path: &str,
390 source: &str,
391 language: Language,
392 now: i64,
393 nodes: &mut Vec<Node>,
394 edges: &mut Vec<Edge>,
395 refs: &mut Vec<UnresolvedReference>,
396) {
397 extract_generic(file_path, source, language, now, nodes, edges, refs);
398}
399
400fn extract_typescript_javascript(
401 file_path: &str,
402 source: &str,
403 language: Language,
404 now: i64,
405 nodes: &mut Vec<Node>,
406 edges: &mut Vec<Edge>,
407 refs: &mut Vec<UnresolvedReference>,
408) {
409 add_regex_nodes(
410 file_path,
411 source,
412 language,
413 now,
414 nodes,
415 edges,
416 r"(?m)^\s*(export\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*([^{;]*)",
417 NodeKind::Function,
418 );
419 add_regex_nodes(
420 file_path,
421 source,
422 language,
423 now,
424 nodes,
425 edges,
426 r"(?m)^\s*(export\s+)?class\s+([A-Za-z_$][A-Za-z0-9_$]*)",
427 NodeKind::Class,
428 );
429 add_regex_nodes(
430 file_path,
431 source,
432 language,
433 now,
434 nodes,
435 edges,
436 r"(?m)^\s*(export\s+)?interface\s+([A-Za-z_$][A-Za-z0-9_$]*)",
437 NodeKind::Interface,
438 );
439 add_regex_nodes(
440 file_path,
441 source,
442 language,
443 now,
444 nodes,
445 edges,
446 r"(?m)^\s*(export\s+)?type\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=",
447 NodeKind::TypeAlias,
448 );
449 add_ts_js_arrow_functions(file_path, source, language, now, nodes, edges);
450 add_ts_js_imports(file_path, source, language, now, nodes, edges, refs);
451 add_tsx_jsx_components(file_path, language, now, nodes, edges);
452 extract_web_file_routes(file_path, language, now, nodes, edges, refs);
453 extract_ts_js_framework_routes(file_path, source, language, now, nodes, edges, refs);
454 add_call_refs(
455 file_path,
456 source,
457 language,
458 nodes,
459 refs,
460 r"([A-Za-z_$][A-Za-z0-9_$.]*)\s*\(",
461 );
462}
463
464fn extract_ts_js_framework_routes(
465 file_path: &str,
466 source: &str,
467 language: Language,
468 now: i64,
469 nodes: &mut Vec<Node>,
470 edges: &mut Vec<Edge>,
471 refs: &mut Vec<UnresolvedReference>,
472) {
473 let express_re = Regex::new(
474 r#"(?:app|router|server)\.(get|post|put|patch|delete|all)\s*\(\s*['"`]([^'"`]+)['"`]\s*,\s*(?:[A-Za-z_$][A-Za-z0-9_$]*\s*,\s*)*([A-Za-z_$][A-Za-z0-9_$]*)"#,
475 )
476 .unwrap();
477 for cap in express_re.captures_iter(source) {
478 let method = cap.get(1).unwrap().as_str().to_ascii_uppercase();
479 let path_match = cap.get(2).unwrap();
480 let handler = cap.get(3).map(|m| m.as_str());
481 add_framework_route_node(
482 file_path,
483 language,
484 now,
485 nodes,
486 edges,
487 refs,
488 &method,
489 path_match.as_str(),
490 handler,
491 line_for(source, path_match.start()),
492 Some(cap.get(0).unwrap().as_str().trim().to_string()),
493 "web-framework",
494 );
495 }
496
497 let react_router_re = Regex::new(
498 r#"<Route\b[^>]*\bpath\s*=\s*["']([^"']+)["'][^>]*(?:\belement\s*=\s*\{\s*<\s*([A-Z][A-Za-z0-9_$]*)|\bComponent\s*=\s*\{\s*([A-Z][A-Za-z0-9_$]*))"#,
499 )
500 .unwrap();
501 for cap in react_router_re.captures_iter(source) {
502 let path_match = cap.get(1).unwrap();
503 let handler = cap.get(2).or_else(|| cap.get(3)).map(|m| m.as_str());
504 add_framework_route_node(
505 file_path,
506 language,
507 now,
508 nodes,
509 edges,
510 refs,
511 "PAGE",
512 path_match.as_str(),
513 handler,
514 line_for(source, path_match.start()),
515 Some(cap.get(0).unwrap().as_str().trim().to_string()),
516 "web-framework",
517 );
518 }
519}
520
521fn add_ts_js_arrow_functions(
522 file_path: &str,
523 source: &str,
524 language: Language,
525 now: i64,
526 nodes: &mut Vec<Node>,
527 edges: &mut Vec<Edge>,
528) {
529 let re = Regex::new(
530 r"(?m)^\s*(export\s+)?const\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*(?::[^=]+)?=\s*(?:async\s+)?(?:\([^)]*\)|[A-Za-z_$][A-Za-z0-9_$]*)(?:\s*:\s*[^=;\n]+)?\s*=>",
531 )
532 .unwrap();
533 for cap in re.captures_iter(source) {
534 let name_match = cap.get(2).unwrap();
535 let mut node = make_node(
536 file_path,
537 language,
538 NodeKind::Function,
539 name_match.as_str(),
540 line_for(source, name_match.start()),
541 0,
542 now,
543 cap.get(0).map(|m| m.as_str().trim().to_string()),
544 );
545 node.is_exported = cap.get(1).is_some();
546 node.visibility = node.is_exported.then(|| "public".to_string());
547 node.is_async = cap
548 .get(0)
549 .map(|m| m.as_str().contains("async"))
550 .unwrap_or(false);
551 add_contains(nodes, edges, &node);
552 nodes.push(node);
553 }
554}
555
556fn add_ts_js_imports(
557 file_path: &str,
558 source: &str,
559 language: Language,
560 now: i64,
561 nodes: &mut Vec<Node>,
562 edges: &mut Vec<Edge>,
563 refs: &mut Vec<UnresolvedReference>,
564) {
565 let re =
566 Regex::new(r#"(?m)^\s*import(?:\s+type)?(?:\s+[^;\n]*?\s+from)?\s+['"]([^'"]+)['"]\s*;?"#)
567 .unwrap();
568 for cap in re.captures_iter(source) {
569 let module = cap.get(1).unwrap();
570 let signature = cap.get(0).unwrap().as_str().trim().to_string();
571 let node = make_node(
572 file_path,
573 language,
574 NodeKind::Import,
575 module.as_str(),
576 line_for(source, module.start()),
577 0,
578 now,
579 Some(signature),
580 );
581 add_contains(nodes, edges, &node);
582 refs.push(unresolved(
583 &nodes[0].id,
584 module.as_str(),
585 EdgeKind::Imports,
586 file_path,
587 language,
588 node.start_line,
589 ));
590 nodes.push(node);
591 }
592}
593
594fn add_tsx_jsx_components(
595 file_path: &str,
596 language: Language,
597 now: i64,
598 nodes: &mut Vec<Node>,
599 edges: &mut Vec<Edge>,
600) {
601 if !matches!(language, Language::Tsx | Language::Jsx) {
602 return;
603 }
604 let component_names: Vec<(String, i64, bool, Option<String>)> = nodes
605 .iter()
606 .filter(|node| matches!(node.kind, NodeKind::Function | NodeKind::Class))
607 .filter(|node| node.name.chars().next().is_some_and(char::is_uppercase))
608 .map(|node| {
609 (
610 node.name.clone(),
611 node.start_line,
612 node.is_exported,
613 node.signature.clone(),
614 )
615 })
616 .collect();
617 for (name, line, is_exported, signature) in component_names {
618 let mut node = make_node(
619 file_path,
620 language,
621 NodeKind::Component,
622 &name,
623 line,
624 0,
625 now,
626 signature,
627 );
628 node.is_exported = is_exported;
629 node.visibility = node.is_exported.then(|| "public".to_string());
630 add_contains(nodes, edges, &node);
631 nodes.push(node);
632 }
633}
634
635fn extract_python(
636 file_path: &str,
637 source: &str,
638 now: i64,
639 nodes: &mut Vec<Node>,
640 edges: &mut Vec<Edge>,
641 refs: &mut Vec<UnresolvedReference>,
642) {
643 let class_re =
644 Regex::new(r"^([ \t]*)class\s+([A-Za-z_][A-Za-z0-9_]*)(?:\s*\([^)]*\))?\s*:").unwrap();
645 let def_re = Regex::new(
646 r"^([ \t]*)(async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?:->\s*[^:]+)?\s*:",
647 )
648 .unwrap();
649 let decorator_re = Regex::new(r"^([ \t]*)@([A-Za-z_][A-Za-z0-9_\.]*(?:\([^)]*\))?)").unwrap();
650
651 let mut class_stack: Vec<(usize, String, String)> = Vec::new();
652 let mut pending_decorators: Vec<(usize, String, i64)> = Vec::new();
653
654 for (line_idx, line) in source.lines().enumerate() {
655 let line_no = line_idx as i64 + 1;
656 let trimmed = line.trim();
657 if trimmed.is_empty() || trimmed.starts_with('#') {
658 continue;
659 }
660
661 let indent = python_indent_width(line);
662 while class_stack.last().is_some_and(|(class_indent, _, _)| {
663 indent <= *class_indent && !trimmed.starts_with('@')
664 }) {
665 class_stack.pop();
666 }
667 pending_decorators.retain(|(decorator_indent, _, _)| *decorator_indent == indent);
668
669 if let Some(cap) = decorator_re.captures(line) {
670 pending_decorators.push((indent, cap[2].to_string(), line_no));
671 continue;
672 }
673
674 if let Some(cap) = class_re.captures(line) {
675 let name = cap[2].to_string();
676 let node = make_node(
677 file_path,
678 Language::Python,
679 NodeKind::Class,
680 &name,
681 line_no,
682 indent as i64,
683 now,
684 Some(trimmed.to_string()),
685 );
686 add_contains(nodes, edges, &node);
687 class_stack.push((indent, name, node.id.clone()));
688 nodes.push(node);
689 pending_decorators.clear();
690 continue;
691 }
692
693 if let Some(cap) = def_re.captures(line) {
694 let name = cap[3].to_string();
695 for (_, decorator, decorator_line) in &pending_decorators {
696 if let Some((method, path)) = python_route_decorator(decorator) {
697 add_framework_route_node(
698 file_path,
699 Language::Python,
700 now,
701 nodes,
702 edges,
703 refs,
704 method,
705 path,
706 Some(&name),
707 *decorator_line,
708 Some(format!("@{decorator}")),
709 "web-framework",
710 );
711 }
712 }
713 let parent_class = class_stack
714 .iter()
715 .rev()
716 .find(|(class_indent, _, _)| indent > *class_indent);
717 let kind = if parent_class.is_some() {
718 NodeKind::Method
719 } else {
720 NodeKind::Function
721 };
722 let mut signature_lines: Vec<String> = pending_decorators
723 .iter()
724 .map(|(_, decorator, _)| format!("@{}", decorator))
725 .collect();
726 signature_lines.push(trimmed.to_string());
727 let mut node = make_node(
728 file_path,
729 Language::Python,
730 kind,
731 &name,
732 line_no,
733 indent as i64,
734 now,
735 Some(signature_lines.join("\n")),
736 );
737 node.is_async = cap.get(2).is_some();
738 node.is_static = pending_decorators
739 .iter()
740 .any(|(_, decorator, _)| decorator == "staticmethod");
741 if let Some((_, class_name, class_id)) = parent_class {
742 node.qualified_name = format!("{}.{}", class_name, name);
743 edges.push(Edge {
744 id: None,
745 source: class_id.clone(),
746 target: node.id.clone(),
747 kind: EdgeKind::Contains,
748 line: None,
749 col: None,
750 provenance: Some("python".into()),
751 });
752 } else {
753 add_contains(nodes, edges, &node);
754 }
755
756 for (_, decorator, decorator_line) in &pending_decorators {
757 let decorator_name = python_decorator_reference_name(decorator);
758 refs_push(
759 refs,
760 &node.id,
761 &decorator_name,
762 EdgeKind::Decorates,
763 file_path,
764 Language::Python,
765 *decorator_line,
766 0,
767 );
768 }
769 nodes.push(node);
770 pending_decorators.clear();
771 continue;
772 }
773
774 extract_python_imports(file_path, line, line_no, now, nodes, edges, refs);
775 pending_decorators.clear();
776 }
777
778 add_call_refs(
779 file_path,
780 source,
781 Language::Python,
782 nodes,
783 refs,
784 r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
785 );
786}
787
788fn python_route_decorator(decorator: &str) -> Option<(&'static str, &str)> {
789 let open = decorator.find('(')?;
790 let callee = decorator[..open].trim();
791 let args = &decorator[open + 1..];
792 let method = if callee.ends_with(".get") {
793 "GET"
794 } else if callee.ends_with(".post") {
795 "POST"
796 } else if callee.ends_with(".put") {
797 "PUT"
798 } else if callee.ends_with(".patch") {
799 "PATCH"
800 } else if callee.ends_with(".delete") {
801 "DELETE"
802 } else if callee.ends_with(".route") || callee.ends_with(".api_route") {
803 "ROUTE"
804 } else {
805 return None;
806 };
807 first_quoted_arg(args).map(|path| (method, path))
808}
809
810fn python_decorator_reference_name(decorator: &str) -> String {
811 decorator
812 .split('(')
813 .next()
814 .unwrap_or(decorator)
815 .trim()
816 .to_string()
817}
818
819fn extract_python_imports(
820 file_path: &str,
821 line: &str,
822 line_no: i64,
823 now: i64,
824 nodes: &mut Vec<Node>,
825 edges: &mut Vec<Edge>,
826 refs: &mut Vec<UnresolvedReference>,
827) {
828 let import_re = Regex::new(r"^\s*import\s+(.+)$").unwrap();
829 let from_re = Regex::new(r"^\s*from\s+([A-Za-z_\.][A-Za-z0-9_\.]*)\s+import\s+(.+)$").unwrap();
830 let Some(file_id) = nodes.first().map(|node| node.id.clone()) else {
831 return;
832 };
833
834 if let Some(cap) = import_re.captures(line) {
835 for spec in cap[1]
836 .split(',')
837 .map(str::trim)
838 .filter(|spec| !spec.is_empty())
839 {
840 let module = spec.split_whitespace().next().unwrap_or(spec);
841 add_python_import_node(
842 file_path,
843 module,
844 line.trim(),
845 line_no,
846 now,
847 nodes,
848 edges,
849 refs,
850 &file_id,
851 );
852 }
853 return;
854 }
855
856 if let Some(cap) = from_re.captures(line) {
857 let module = cap[1].trim();
858 add_python_import_node(
859 file_path,
860 module,
861 line.trim(),
862 line_no,
863 now,
864 nodes,
865 edges,
866 refs,
867 &file_id,
868 );
869 }
870}
871
872#[allow(clippy::too_many_arguments)]
873fn add_python_import_node(
874 file_path: &str,
875 module: &str,
876 signature: &str,
877 line_no: i64,
878 now: i64,
879 nodes: &mut Vec<Node>,
880 edges: &mut Vec<Edge>,
881 refs: &mut Vec<UnresolvedReference>,
882 file_id: &str,
883) {
884 let node = make_node(
885 file_path,
886 Language::Python,
887 NodeKind::Import,
888 module,
889 line_no,
890 0,
891 now,
892 Some(signature.to_string()),
893 );
894 edges.push(Edge {
895 id: None,
896 source: file_id.to_string(),
897 target: node.id.clone(),
898 kind: EdgeKind::Contains,
899 line: None,
900 col: None,
901 provenance: Some("python".into()),
902 });
903 refs_push(
904 refs,
905 file_id,
906 module,
907 EdgeKind::Imports,
908 file_path,
909 Language::Python,
910 line_no,
911 0,
912 );
913 nodes.push(node);
914}
915
916fn python_indent_width(line: &str) -> usize {
917 line.chars()
918 .take_while(|ch| matches!(ch, ' ' | '\t'))
919 .map(|ch| if ch == '\t' { 4 } else { 1 })
920 .sum()
921}
922
923fn extract_go(
924 file_path: &str,
925 source: &str,
926 now: i64,
927 nodes: &mut Vec<Node>,
928 edges: &mut Vec<Edge>,
929 refs: &mut Vec<UnresolvedReference>,
930) {
931 let package_re = Regex::new(r"(?m)^\s*package\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap();
932 if let Some(cap) = package_re.captures(source) {
933 let package = cap.get(1).unwrap();
934 let node = make_node(
935 file_path,
936 Language::Go,
937 NodeKind::Module,
938 package.as_str(),
939 line_for(source, package.start()),
940 0,
941 now,
942 cap.get(0).map(|m| m.as_str().trim().to_string()),
943 );
944 add_contains(nodes, edges, &node);
945 nodes.push(node);
946 }
947
948 add_go_imports(file_path, source, now, nodes, edges, refs);
949 add_go_types(file_path, source, now, nodes, edges);
950 add_go_functions(file_path, source, now, nodes, edges);
951 add_go_call_refs(file_path, source, nodes, refs);
952}
953
954fn add_go_functions(
955 file_path: &str,
956 source: &str,
957 now: i64,
958 nodes: &mut Vec<Node>,
959 edges: &mut Vec<Edge>,
960) {
961 let method_re = Regex::new(
962 r"(?m)^\s*func\s+\(\s*(?:[A-Za-z_][A-Za-z0-9_]*\s+)?\*?\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)\s*([A-Za-z_][A-Za-z0-9_]*)\s*(\([^)]*\)\s*(?:\([^)]*\)|[A-Za-z_][A-Za-z0-9_\.\[\]]*)?)",
963 )
964 .unwrap();
965 for cap in method_re.captures_iter(source) {
966 let receiver = cap.get(1).unwrap().as_str();
967 let name = cap.get(2).unwrap().as_str();
968 let signature = cap.get(0).map(|m| m.as_str().trim().to_string());
969 let mut node = make_node(
970 file_path,
971 Language::Go,
972 NodeKind::Method,
973 name,
974 line_for(source, cap.get(2).unwrap().start()),
975 0,
976 now,
977 signature,
978 );
979 node.qualified_name = format!("{}.{}", receiver, name);
980 add_contains(nodes, edges, &node);
981 nodes.push(node);
982 }
983
984 let function_re = Regex::new(
985 r"(?m)^\s*func\s+([A-Za-z_][A-Za-z0-9_]*)\s*(\([^)]*\)\s*(?:\([^)]*\)|[A-Za-z_][A-Za-z0-9_\.\[\]]*)?)",
986 )
987 .unwrap();
988 for cap in function_re.captures_iter(source) {
989 let name = cap.get(1).unwrap().as_str();
990 let signature = cap.get(0).map(|m| m.as_str().trim().to_string());
991 let node = make_node(
992 file_path,
993 Language::Go,
994 NodeKind::Function,
995 name,
996 line_for(source, cap.get(1).unwrap().start()),
997 0,
998 now,
999 signature,
1000 );
1001 add_contains(nodes, edges, &node);
1002 nodes.push(node);
1003 }
1004}
1005
1006fn add_go_types(
1007 file_path: &str,
1008 source: &str,
1009 now: i64,
1010 nodes: &mut Vec<Node>,
1011 edges: &mut Vec<Edge>,
1012) {
1013 let type_re =
1014 Regex::new(r"(?m)^\s*type\s+([A-Za-z_][A-Za-z0-9_]*)\s+(struct|interface)\s*\{").unwrap();
1015 for cap in type_re.captures_iter(source) {
1016 let kind = match cap.get(2).unwrap().as_str() {
1017 "struct" => NodeKind::Struct,
1018 "interface" => NodeKind::Interface,
1019 _ => continue,
1020 };
1021 let name = cap.get(1).unwrap();
1022 let node = make_node(
1023 file_path,
1024 Language::Go,
1025 kind,
1026 name.as_str(),
1027 line_for(source, name.start()),
1028 0,
1029 now,
1030 cap.get(0).map(|m| m.as_str().trim().to_string()),
1031 );
1032 add_contains(nodes, edges, &node);
1033 nodes.push(node);
1034 }
1035}
1036
1037fn add_go_imports(
1038 file_path: &str,
1039 source: &str,
1040 now: i64,
1041 nodes: &mut Vec<Node>,
1042 edges: &mut Vec<Edge>,
1043 refs: &mut Vec<UnresolvedReference>,
1044) {
1045 let single_re =
1046 Regex::new(r#"(?m)^\s*import\s+(?:(\.|_|[A-Za-z_][A-Za-z0-9_]*)\s+)?"([^"]+)""#).unwrap();
1047 for cap in single_re.captures_iter(source) {
1048 let module = cap.get(2).unwrap();
1049 add_go_import_node(
1050 file_path,
1051 module.as_str(),
1052 cap.get(0).unwrap().as_str().trim(),
1053 line_for(source, module.start()),
1054 now,
1055 nodes,
1056 edges,
1057 refs,
1058 );
1059 }
1060
1061 let block_re = Regex::new(r#"(?ms)^\s*import\s*\((?P<body>.*?)\)"#).unwrap();
1062 let item_re = Regex::new(r#"(?m)^\s*(?:(\.|_|[A-Za-z_][A-Za-z0-9_]*)\s+)?"([^"]+)""#).unwrap();
1063 for block in block_re.captures_iter(source) {
1064 let Some(body) = block.name("body") else {
1065 continue;
1066 };
1067 for cap in item_re.captures_iter(body.as_str()) {
1068 let module = cap.get(2).unwrap();
1069 let absolute_module_start = body.start() + module.start();
1070 add_go_import_node(
1071 file_path,
1072 module.as_str(),
1073 cap.get(0).unwrap().as_str().trim(),
1074 line_for(source, absolute_module_start),
1075 now,
1076 nodes,
1077 edges,
1078 refs,
1079 );
1080 }
1081 }
1082}
1083
1084#[allow(clippy::too_many_arguments)]
1085fn add_go_import_node(
1086 file_path: &str,
1087 module: &str,
1088 signature: &str,
1089 line: i64,
1090 now: i64,
1091 nodes: &mut Vec<Node>,
1092 edges: &mut Vec<Edge>,
1093 refs: &mut Vec<UnresolvedReference>,
1094) {
1095 let Some(file_id) = nodes.first().map(|node| node.id.clone()) else {
1096 return;
1097 };
1098 let node = make_node(
1099 file_path,
1100 Language::Go,
1101 NodeKind::Import,
1102 module,
1103 line,
1104 0,
1105 now,
1106 Some(signature.to_string()),
1107 );
1108 add_contains(nodes, edges, &node);
1109 refs_push(
1110 refs,
1111 &file_id,
1112 module,
1113 EdgeKind::Imports,
1114 file_path,
1115 Language::Go,
1116 line,
1117 0,
1118 );
1119 nodes.push(node);
1120}
1121
1122fn add_go_call_refs(
1123 file_path: &str,
1124 source: &str,
1125 nodes: &[Node],
1126 refs: &mut Vec<UnresolvedReference>,
1127) {
1128 let call_re = Regex::new(r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(").unwrap();
1129 let keywords = [
1130 "append", "cap", "close", "complex", "copy", "delete", "func", "if", "imag", "len", "make",
1131 "new", "panic", "print", "println", "real", "recover", "return", "switch",
1132 ];
1133 for cap in call_re.captures_iter(source) {
1134 let name_match = cap.get(1).unwrap();
1135 let name = name_match.as_str();
1136 let line = line_for(source, name_match.start());
1137 let line_text = source
1138 .lines()
1139 .nth(line.saturating_sub(1) as usize)
1140 .unwrap_or_default()
1141 .trim_start();
1142 if keywords.contains(&name)
1143 || line_text.starts_with("func ")
1144 || line_text.starts_with("type ")
1145 {
1146 continue;
1147 }
1148 if let Some(caller) = nodes
1149 .iter()
1150 .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
1151 .rev()
1152 .find(|n| n.start_line <= line)
1153 {
1154 refs_push(
1155 refs,
1156 &caller.id,
1157 name,
1158 EdgeKind::Calls,
1159 file_path,
1160 Language::Go,
1161 line,
1162 0,
1163 );
1164 }
1165 }
1166}
1167
1168fn extract_java_kotlin(
1169 file_path: &str,
1170 source: &str,
1171 language: Language,
1172 now: i64,
1173 nodes: &mut Vec<Node>,
1174 edges: &mut Vec<Edge>,
1175 refs: &mut Vec<UnresolvedReference>,
1176) {
1177 add_java_kotlin_imports(file_path, source, language, now, nodes, edges, refs);
1178 add_java_kotlin_types_and_members(file_path, source, language, now, nodes, edges, refs);
1179 add_call_refs(
1180 file_path,
1181 source,
1182 language,
1183 nodes,
1184 refs,
1185 r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
1186 );
1187}
1188
1189fn add_java_kotlin_imports(
1190 file_path: &str,
1191 source: &str,
1192 language: Language,
1193 now: i64,
1194 nodes: &mut Vec<Node>,
1195 edges: &mut Vec<Edge>,
1196 refs: &mut Vec<UnresolvedReference>,
1197) {
1198 let re =
1199 Regex::new(r"(?m)^\s*import\s+(?:static\s+)?([A-Za-z_][A-Za-z0-9_\.\*]*)\s*;?").unwrap();
1200 for cap in re.captures_iter(source) {
1201 let module = cap.get(1).unwrap();
1202 let node = make_node(
1203 file_path,
1204 language,
1205 NodeKind::Import,
1206 module.as_str(),
1207 line_for(source, module.start()),
1208 0,
1209 now,
1210 cap.get(0).map(|m| m.as_str().trim().to_string()),
1211 );
1212 add_contains(nodes, edges, &node);
1213 refs_push(
1214 refs,
1215 &nodes[0].id,
1216 module.as_str(),
1217 EdgeKind::Imports,
1218 file_path,
1219 language,
1220 node.start_line,
1221 0,
1222 );
1223 nodes.push(node);
1224 }
1225}
1226
1227fn add_java_kotlin_types_and_members(
1228 file_path: &str,
1229 source: &str,
1230 language: Language,
1231 now: i64,
1232 nodes: &mut Vec<Node>,
1233 edges: &mut Vec<Edge>,
1234 refs: &mut Vec<UnresolvedReference>,
1235) {
1236 let type_re = Regex::new(
1237 r"^\s*(?:(public|private|protected|internal)\s+)?(?:(?:data|abstract|open|final|sealed|enum)\s+)*(class|interface|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{]*)\{?",
1238 )
1239 .unwrap();
1240 let java_method_re = Regex::new(
1241 r"^\s*(?:(public|private|protected)\s+)?((?:static|final|abstract|synchronized)\s+)*(?:[A-Za-z_][A-Za-z0-9_<>,\.\?\[\]\s]*\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\([^;{}]*\)\s*(?:throws\s+[A-Za-z0-9_,\.\s]+)?\{?",
1242 )
1243 .unwrap();
1244 let kotlin_fun_re = Regex::new(
1245 r"^\s*(?:(public|private|protected|internal)\s+)?((?:suspend|inline|open|override|private|public|protected|internal)\s+)*fun\s+(?:(?P<receiver>[A-Za-z_][A-Za-z0-9_\.]*)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?::\s*[A-Za-z_][A-Za-z0-9_<>,\.\?\s]*)?",
1246 )
1247 .unwrap();
1248 let annotation_re = Regex::new(r"^\s*@([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
1249 let mut type_stack: Vec<(usize, String, String)> = Vec::new();
1250 let mut pending_annotations: Vec<(String, i64)> = Vec::new();
1251
1252 for (idx, line) in source.lines().enumerate() {
1253 let line_no = idx as i64 + 1;
1254 let trimmed = line.trim();
1255 if trimmed.is_empty() || trimmed.starts_with("//") {
1256 continue;
1257 }
1258 let indent = python_indent_width(line);
1259 while type_stack
1260 .last()
1261 .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
1262 {
1263 type_stack.pop();
1264 }
1265
1266 if let Some(cap) = annotation_re.captures(line) {
1267 pending_annotations.push((cap[1].to_string(), line_no));
1268 continue;
1269 }
1270
1271 if let Some(cap) = type_re.captures(line) {
1272 let keyword = cap.get(2).unwrap().as_str();
1273 let kind = match keyword {
1274 "interface" => NodeKind::Interface,
1275 "enum" => NodeKind::Enum,
1276 _ => NodeKind::Class,
1277 };
1278 let name = cap.get(3).unwrap();
1279 let mut node = make_node(
1280 file_path,
1281 language,
1282 kind,
1283 name.as_str(),
1284 line_no,
1285 indent as i64,
1286 now,
1287 Some(java_kotlin_signature(&pending_annotations, trimmed)),
1288 );
1289 node.visibility = cap.get(1).map(|m| m.as_str().to_string());
1290 node.is_exported = node
1291 .visibility
1292 .as_deref()
1293 .map(|visibility| visibility == "public")
1294 .unwrap_or(language == Language::Kotlin);
1295 add_contains(nodes, edges, &node);
1296 add_java_kotlin_metadata_refs(
1297 &node.id,
1298 cap.get(4).map(|m| m.as_str()).unwrap_or_default(),
1299 &pending_annotations,
1300 file_path,
1301 language,
1302 line_no,
1303 refs,
1304 );
1305 if !trimmed.contains('}') {
1306 type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
1307 }
1308 nodes.push(node);
1309 pending_annotations.clear();
1310 continue;
1311 }
1312
1313 let member = match language {
1314 Language::Kotlin => kotlin_fun_re.captures(line).map(|cap| {
1315 (
1316 cap.name("name").unwrap().as_str().to_string(),
1317 cap.name("receiver").map(|m| m.as_str().to_string()),
1318 cap.get(1).map(|m| m.as_str().to_string()),
1319 cap.get(2)
1320 .map(|m| m.as_str().contains("suspend"))
1321 .unwrap_or(false),
1322 cap.get(0).unwrap().as_str().trim().to_string(),
1323 )
1324 }),
1325 _ => java_method_re.captures(line).and_then(|cap| {
1326 let name = cap.get(3).unwrap().as_str();
1327 let skip = matches!(
1328 name,
1329 "if" | "for" | "while" | "switch" | "catch" | "return" | "new"
1330 );
1331 (!skip).then(|| {
1332 (
1333 name.to_string(),
1334 None,
1335 cap.get(1).map(|m| m.as_str().to_string()),
1336 false,
1337 cap.get(0).unwrap().as_str().trim().to_string(),
1338 )
1339 })
1340 }),
1341 };
1342 if let Some((name, receiver, visibility, is_async, signature)) = member {
1343 let kind = if type_stack.is_empty() && language == Language::Kotlin {
1344 NodeKind::Function
1345 } else {
1346 NodeKind::Method
1347 };
1348 let mut node = make_node(
1349 file_path,
1350 language,
1351 kind,
1352 &name,
1353 line_no,
1354 indent as i64,
1355 now,
1356 Some(java_kotlin_signature(&pending_annotations, &signature)),
1357 );
1358 node.visibility = visibility;
1359 node.is_exported = node
1360 .visibility
1361 .as_deref()
1362 .map(|visibility| visibility == "public")
1363 .unwrap_or(language == Language::Kotlin);
1364 node.is_async = is_async;
1365 node.is_static = signature.contains(" static ");
1366 if let Some(receiver) =
1367 receiver.or_else(|| type_stack.last().map(|(_, name, _)| name.clone()))
1368 {
1369 node.qualified_name = format!("{}.{}", receiver, name);
1370 }
1371 if let Some((_, _, parent_id)) = type_stack.last() {
1372 edges.push(Edge {
1373 id: None,
1374 source: parent_id.clone(),
1375 target: node.id.clone(),
1376 kind: EdgeKind::Contains,
1377 line: None,
1378 col: None,
1379 provenance: Some(language.as_str().into()),
1380 });
1381 } else {
1382 add_contains(nodes, edges, &node);
1383 }
1384 for (annotation, annotation_line) in &pending_annotations {
1385 refs_push(
1386 refs,
1387 &node.id,
1388 annotation,
1389 EdgeKind::Decorates,
1390 file_path,
1391 language,
1392 *annotation_line,
1393 0,
1394 );
1395 }
1396 nodes.push(node);
1397 pending_annotations.clear();
1398 continue;
1399 }
1400
1401 pending_annotations.clear();
1402 }
1403}
1404
1405fn java_kotlin_signature(annotations: &[(String, i64)], declaration: &str) -> String {
1406 if annotations.is_empty() {
1407 declaration.to_string()
1408 } else {
1409 let mut lines: Vec<String> = annotations
1410 .iter()
1411 .map(|(annotation, _)| format!("@{}", annotation))
1412 .collect();
1413 lines.push(declaration.to_string());
1414 lines.join("\n")
1415 }
1416}
1417
1418#[allow(clippy::too_many_arguments)]
1419fn add_java_kotlin_metadata_refs(
1420 node_id: &str,
1421 tail: &str,
1422 annotations: &[(String, i64)],
1423 file_path: &str,
1424 language: Language,
1425 line: i64,
1426 refs: &mut Vec<UnresolvedReference>,
1427) {
1428 let extends_re = Regex::new(r"\bextends\s+([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
1429 let implements_re = Regex::new(r"\bimplements\s+([A-Za-z_][A-Za-z0-9_\.,\s]*)").unwrap();
1430 let kotlin_super_re = Regex::new(r":\s*([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
1431 if let Some(cap) = extends_re
1432 .captures(tail)
1433 .or_else(|| kotlin_super_re.captures(tail))
1434 {
1435 refs_push(
1436 refs,
1437 node_id,
1438 cap.get(1).unwrap().as_str(),
1439 EdgeKind::Extends,
1440 file_path,
1441 language,
1442 line,
1443 0,
1444 );
1445 }
1446 if let Some(cap) = implements_re.captures(tail) {
1447 for name in cap[1]
1448 .split(',')
1449 .map(str::trim)
1450 .filter(|name| !name.is_empty())
1451 {
1452 refs_push(
1453 refs,
1454 node_id,
1455 name,
1456 EdgeKind::Implements,
1457 file_path,
1458 language,
1459 line,
1460 0,
1461 );
1462 }
1463 }
1464 for (annotation, annotation_line) in annotations {
1465 refs_push(
1466 refs,
1467 node_id,
1468 annotation,
1469 EdgeKind::Decorates,
1470 file_path,
1471 language,
1472 *annotation_line,
1473 0,
1474 );
1475 }
1476}
1477
1478fn extract_csharp(
1479 file_path: &str,
1480 source: &str,
1481 now: i64,
1482 nodes: &mut Vec<Node>,
1483 edges: &mut Vec<Edge>,
1484 refs: &mut Vec<UnresolvedReference>,
1485) {
1486 add_csharp_usings(file_path, source, now, nodes, edges, refs);
1487 add_csharp_types_and_members(file_path, source, now, nodes, edges, refs);
1488 add_csharp_call_refs(file_path, source, nodes, refs);
1489}
1490
1491fn add_csharp_usings(
1492 file_path: &str,
1493 source: &str,
1494 now: i64,
1495 nodes: &mut Vec<Node>,
1496 edges: &mut Vec<Edge>,
1497 refs: &mut Vec<UnresolvedReference>,
1498) {
1499 let re = Regex::new(
1500 r"(?m)^\s*using\s+(?:static\s+)?(?:(?:[A-Za-z_][A-Za-z0-9_]*)\s*=\s*)?([A-Za-z_][A-Za-z0-9_\.]*)\s*;",
1501 )
1502 .unwrap();
1503 for cap in re.captures_iter(source) {
1504 let module = cap.get(1).unwrap();
1505 let node = make_node(
1506 file_path,
1507 Language::CSharp,
1508 NodeKind::Import,
1509 module.as_str(),
1510 line_for(source, module.start()),
1511 0,
1512 now,
1513 cap.get(0).map(|m| m.as_str().trim().to_string()),
1514 );
1515 add_contains(nodes, edges, &node);
1516 refs_push(
1517 refs,
1518 &nodes[0].id,
1519 module.as_str(),
1520 EdgeKind::Imports,
1521 file_path,
1522 Language::CSharp,
1523 node.start_line,
1524 0,
1525 );
1526 nodes.push(node);
1527 }
1528}
1529
1530fn add_csharp_types_and_members(
1531 file_path: &str,
1532 source: &str,
1533 now: i64,
1534 nodes: &mut Vec<Node>,
1535 edges: &mut Vec<Edge>,
1536 refs: &mut Vec<UnresolvedReference>,
1537) {
1538 let type_re = Regex::new(
1539 r"^\s*(?:(public|private|protected|internal)\s+)?(?:(?:abstract|sealed|static|partial)\s+)*(class|interface|struct|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{]*)\{?",
1540 )
1541 .unwrap();
1542 let method_re = Regex::new(
1543 r"^\s*(?:(public|private|protected|internal)\s+)?((?:static|async|virtual|override|abstract|sealed|partial)\s+)*(?:[A-Za-z_][A-Za-z0-9_<>,\.\?\[\]\s]*\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\([^;{}]*\)\s*(?:where\s+[^{]+)?\{?",
1544 )
1545 .unwrap();
1546 let property_re = Regex::new(
1547 r"^\s*(?:(public|private|protected|internal)\s+)?((?:static|virtual|override|abstract|sealed)\s+)*(?:[A-Za-z_][A-Za-z0-9_<>,\.\?\[\]\s]*\s+)([A-Za-z_][A-Za-z0-9_]*)\s*\{",
1548 )
1549 .unwrap();
1550 let attribute_re = Regex::new(r"^\s*\[\s*([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
1551 let mut type_stack: Vec<(usize, String, String)> = Vec::new();
1552 let mut pending_attributes: Vec<(String, i64)> = Vec::new();
1553
1554 for (idx, line) in source.lines().enumerate() {
1555 let line_no = idx as i64 + 1;
1556 let trimmed = line.trim();
1557 if trimmed.is_empty() || trimmed.starts_with("//") {
1558 continue;
1559 }
1560 let indent = python_indent_width(line);
1561 while type_stack
1562 .last()
1563 .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
1564 {
1565 type_stack.pop();
1566 }
1567
1568 if let Some(cap) = attribute_re.captures(line) {
1569 pending_attributes.push((cap[1].to_string(), line_no));
1570 continue;
1571 }
1572
1573 if let Some(cap) = type_re.captures(line) {
1574 let keyword = cap.get(2).unwrap().as_str();
1575 let kind = match keyword {
1576 "interface" => NodeKind::Interface,
1577 "struct" => NodeKind::Struct,
1578 "enum" => NodeKind::Enum,
1579 _ => NodeKind::Class,
1580 };
1581 let name = cap.get(3).unwrap();
1582 let mut node = make_node(
1583 file_path,
1584 Language::CSharp,
1585 kind,
1586 name.as_str(),
1587 line_no,
1588 indent as i64,
1589 now,
1590 Some(csharp_signature(&pending_attributes, trimmed)),
1591 );
1592 node.visibility = cap
1593 .get(1)
1594 .map(|m| m.as_str().to_string())
1595 .or_else(|| Some("private".to_string()));
1596 node.is_exported = node.visibility.as_deref() == Some("public");
1597 add_contains(nodes, edges, &node);
1598 add_csharp_metadata_refs(
1599 &node.id,
1600 cap.get(4).map(|m| m.as_str()).unwrap_or_default(),
1601 &pending_attributes,
1602 file_path,
1603 line_no,
1604 refs,
1605 );
1606 if !trimmed.contains('}') {
1607 type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
1608 }
1609 nodes.push(node);
1610 pending_attributes.clear();
1611 continue;
1612 }
1613
1614 let member = method_re
1615 .captures(line)
1616 .and_then(|cap| {
1617 let name = cap.get(3).unwrap().as_str();
1618 let skip = matches!(
1619 name,
1620 "if" | "for" | "foreach" | "while" | "switch" | "catch" | "return" | "new"
1621 );
1622 (!skip).then(|| {
1623 (
1624 NodeKind::Method,
1625 name.to_string(),
1626 cap.get(1).map(|m| m.as_str().to_string()),
1627 cap.get(2)
1628 .map(|m| m.as_str().to_string())
1629 .unwrap_or_default(),
1630 cap.get(0).unwrap().as_str().trim().to_string(),
1631 )
1632 })
1633 })
1634 .or_else(|| {
1635 property_re.captures(line).map(|cap| {
1636 (
1637 NodeKind::Property,
1638 cap.get(3).unwrap().as_str().to_string(),
1639 cap.get(1).map(|m| m.as_str().to_string()),
1640 cap.get(2)
1641 .map(|m| m.as_str().to_string())
1642 .unwrap_or_default(),
1643 cap.get(0).unwrap().as_str().trim().to_string(),
1644 )
1645 })
1646 });
1647
1648 if let Some((kind, name, visibility, modifiers, signature)) = member {
1649 let mut node = make_node(
1650 file_path,
1651 Language::CSharp,
1652 kind,
1653 &name,
1654 line_no,
1655 indent as i64,
1656 now,
1657 Some(csharp_signature(&pending_attributes, &signature)),
1658 );
1659 node.visibility = visibility.or_else(|| Some("private".to_string()));
1660 node.is_exported = node.visibility.as_deref() == Some("public");
1661 node.is_static = modifiers.contains("static") || signature.contains(" static ");
1662 node.is_async = modifiers.contains("async") || signature.contains(" async ");
1663 if let Some((_, type_name, parent_id)) = type_stack.last() {
1664 node.qualified_name = format!("{}.{}", type_name, name);
1665 edges.push(Edge {
1666 id: None,
1667 source: parent_id.clone(),
1668 target: node.id.clone(),
1669 kind: EdgeKind::Contains,
1670 line: None,
1671 col: None,
1672 provenance: Some("csharp".into()),
1673 });
1674 } else {
1675 add_contains(nodes, edges, &node);
1676 }
1677 for (attribute, attribute_line) in &pending_attributes {
1678 refs_push(
1679 refs,
1680 &node.id,
1681 attribute,
1682 EdgeKind::Decorates,
1683 file_path,
1684 Language::CSharp,
1685 *attribute_line,
1686 0,
1687 );
1688 }
1689 nodes.push(node);
1690 pending_attributes.clear();
1691 continue;
1692 }
1693
1694 pending_attributes.clear();
1695 }
1696}
1697
1698fn csharp_signature(attributes: &[(String, i64)], declaration: &str) -> String {
1699 if attributes.is_empty() {
1700 declaration.to_string()
1701 } else {
1702 let mut lines: Vec<String> = attributes
1703 .iter()
1704 .map(|(attribute, _)| format!("[{}]", attribute))
1705 .collect();
1706 lines.push(declaration.to_string());
1707 lines.join("\n")
1708 }
1709}
1710
1711fn add_csharp_metadata_refs(
1712 node_id: &str,
1713 tail: &str,
1714 attributes: &[(String, i64)],
1715 file_path: &str,
1716 line: i64,
1717 refs: &mut Vec<UnresolvedReference>,
1718) {
1719 let base_tail = tail.trim().strip_prefix(':').unwrap_or("").trim();
1720 let mut bases = base_tail
1721 .split(',')
1722 .map(str::trim)
1723 .filter(|name| !name.is_empty())
1724 .map(|name| name.split_whitespace().next().unwrap_or(name));
1725 if let Some(base) = bases.next() {
1726 refs_push(
1727 refs,
1728 node_id,
1729 base,
1730 EdgeKind::Extends,
1731 file_path,
1732 Language::CSharp,
1733 line,
1734 0,
1735 );
1736 }
1737 for name in bases {
1738 refs_push(
1739 refs,
1740 node_id,
1741 name,
1742 EdgeKind::Implements,
1743 file_path,
1744 Language::CSharp,
1745 line,
1746 0,
1747 );
1748 }
1749 for (attribute, attribute_line) in attributes {
1750 refs_push(
1751 refs,
1752 node_id,
1753 attribute,
1754 EdgeKind::Decorates,
1755 file_path,
1756 Language::CSharp,
1757 *attribute_line,
1758 0,
1759 );
1760 }
1761}
1762
1763fn add_csharp_call_refs(
1764 file_path: &str,
1765 source: &str,
1766 nodes: &[Node],
1767 refs: &mut Vec<UnresolvedReference>,
1768) {
1769 let re = Regex::new(r"([A-Za-z_][A-Za-z0-9_\.]*)\s*(?:<[^;\n()]+>)?\s*\(").unwrap();
1770 let keywords = [
1771 "if", "for", "foreach", "while", "switch", "catch", "return", "new", "typeof", "nameof",
1772 "using",
1773 ];
1774 for cap in re.captures_iter(source) {
1775 let name_match = cap.get(1).unwrap();
1776 let name = name_match.as_str();
1777 let line = line_for(source, name_match.start());
1778 let line_text = source
1779 .lines()
1780 .nth(line.saturating_sub(1) as usize)
1781 .unwrap_or_default()
1782 .trim_start();
1783 if keywords.contains(&name)
1784 || line_text.contains(&format!("{name}("))
1785 && matches!(
1786 line_text.split_whitespace().next(),
1787 Some("public" | "private" | "protected" | "internal" | "static" | "async")
1788 )
1789 {
1790 continue;
1791 }
1792 if let Some(caller) = nodes
1793 .iter()
1794 .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
1795 .rev()
1796 .find(|n| n.start_line <= line)
1797 {
1798 refs_push(
1799 refs,
1800 &caller.id,
1801 name,
1802 EdgeKind::Calls,
1803 file_path,
1804 Language::CSharp,
1805 line,
1806 0,
1807 );
1808 }
1809 }
1810}
1811
1812fn extract_php_ruby(
1813 file_path: &str,
1814 source: &str,
1815 language: Language,
1816 now: i64,
1817 nodes: &mut Vec<Node>,
1818 edges: &mut Vec<Edge>,
1819 refs: &mut Vec<UnresolvedReference>,
1820) {
1821 match language {
1822 Language::Php => extract_php(file_path, source, now, nodes, edges, refs),
1823 Language::Ruby => extract_ruby(file_path, source, now, nodes, edges, refs),
1824 _ => {}
1825 }
1826}
1827
1828fn extract_php(
1829 file_path: &str,
1830 source: &str,
1831 now: i64,
1832 nodes: &mut Vec<Node>,
1833 edges: &mut Vec<Edge>,
1834 refs: &mut Vec<UnresolvedReference>,
1835) {
1836 add_php_uses(file_path, source, now, nodes, edges, refs);
1837 add_php_symbols(file_path, source, now, nodes, edges, refs);
1838 add_php_call_refs(file_path, source, nodes, refs);
1839}
1840
1841fn add_php_uses(
1842 file_path: &str,
1843 source: &str,
1844 now: i64,
1845 nodes: &mut Vec<Node>,
1846 edges: &mut Vec<Edge>,
1847 refs: &mut Vec<UnresolvedReference>,
1848) {
1849 let single_re =
1850 Regex::new(r"(?m)^\s*use\s+((?:function\s+|const\s+)?[A-Za-z_\\][A-Za-z0-9_\\]*)(?:\s+as\s+[A-Za-z_][A-Za-z0-9_]*)?\s*;")
1851 .unwrap();
1852 for cap in single_re.captures_iter(source) {
1853 let module = cap.get(1).unwrap().as_str().trim();
1854 add_php_import_node(
1855 file_path,
1856 module,
1857 cap.get(0).unwrap().as_str().trim(),
1858 line_for(source, cap.get(1).unwrap().start()),
1859 now,
1860 nodes,
1861 edges,
1862 refs,
1863 );
1864 }
1865
1866 let group_re =
1867 Regex::new(r"(?ms)^\s*use\s+([A-Za-z_\\][A-Za-z0-9_\\]*)\\\s*\{(?P<body>.*?)\}\s*;")
1868 .unwrap();
1869 let item_re =
1870 Regex::new(r"(?m)([A-Za-z_][A-Za-z0-9_]*)(?:\s+as\s+[A-Za-z_][A-Za-z0-9_]*)?").unwrap();
1871 for cap in group_re.captures_iter(source) {
1872 let prefix = cap.get(1).unwrap().as_str();
1873 let Some(body) = cap.name("body") else {
1874 continue;
1875 };
1876 for item in item_re.captures_iter(body.as_str()) {
1877 let leaf = item.get(1).unwrap();
1878 let module = format!("{prefix}\\{}", leaf.as_str());
1879 add_php_import_node(
1880 file_path,
1881 &module,
1882 item.get(0).unwrap().as_str().trim(),
1883 line_for(source, body.start() + leaf.start()),
1884 now,
1885 nodes,
1886 edges,
1887 refs,
1888 );
1889 }
1890 }
1891}
1892
1893#[allow(clippy::too_many_arguments)]
1894fn add_php_import_node(
1895 file_path: &str,
1896 module: &str,
1897 signature: &str,
1898 line: i64,
1899 now: i64,
1900 nodes: &mut Vec<Node>,
1901 edges: &mut Vec<Edge>,
1902 refs: &mut Vec<UnresolvedReference>,
1903) {
1904 let Some(file_id) = nodes.first().map(|node| node.id.clone()) else {
1905 return;
1906 };
1907 let node = make_node(
1908 file_path,
1909 Language::Php,
1910 NodeKind::Import,
1911 module,
1912 line,
1913 0,
1914 now,
1915 Some(signature.to_string()),
1916 );
1917 add_contains(nodes, edges, &node);
1918 refs_push(
1919 refs,
1920 &file_id,
1921 module,
1922 EdgeKind::Imports,
1923 file_path,
1924 Language::Php,
1925 line,
1926 0,
1927 );
1928 nodes.push(node);
1929}
1930
1931fn add_php_symbols(
1932 file_path: &str,
1933 source: &str,
1934 now: i64,
1935 nodes: &mut Vec<Node>,
1936 edges: &mut Vec<Edge>,
1937 refs: &mut Vec<UnresolvedReference>,
1938) {
1939 let type_re = Regex::new(
1940 r"^\s*(?:(abstract|final)\s+)?(class|interface|trait|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{]*)\{?",
1941 )
1942 .unwrap();
1943 let function_re = Regex::new(
1944 r"^\s*(?:(public|private|protected)\s+)?((?:static|abstract|final)\s+)*function\s+([A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)",
1945 )
1946 .unwrap();
1947 let mut type_stack: Vec<(usize, String, String)> = Vec::new();
1948
1949 for (idx, line) in source.lines().enumerate() {
1950 let line_no = idx as i64 + 1;
1951 let trimmed = line.trim();
1952 if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with('#') {
1953 continue;
1954 }
1955 let indent = python_indent_width(line);
1956 while type_stack
1957 .last()
1958 .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
1959 {
1960 type_stack.pop();
1961 }
1962
1963 if let Some(cap) = type_re.captures(line) {
1964 let kind = match cap.get(2).unwrap().as_str() {
1965 "interface" => NodeKind::Interface,
1966 "trait" => NodeKind::Trait,
1967 "enum" => NodeKind::Enum,
1968 _ => NodeKind::Class,
1969 };
1970 let name = cap.get(3).unwrap();
1971 let mut node = make_node(
1972 file_path,
1973 Language::Php,
1974 kind,
1975 name.as_str(),
1976 line_no,
1977 indent as i64,
1978 now,
1979 Some(trimmed.to_string()),
1980 );
1981 node.visibility = Some("public".to_string());
1982 node.is_exported = true;
1983 add_contains(nodes, edges, &node);
1984 add_php_inheritance_refs(
1985 &node.id,
1986 cap.get(4).map(|m| m.as_str()).unwrap_or_default(),
1987 file_path,
1988 line_no,
1989 refs,
1990 );
1991 if !trimmed.contains('}') {
1992 type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
1993 }
1994 nodes.push(node);
1995 continue;
1996 }
1997
1998 if let Some(cap) = function_re.captures(line) {
1999 let name = cap.get(3).unwrap().as_str();
2000 let kind = if type_stack.is_empty() {
2001 NodeKind::Function
2002 } else {
2003 NodeKind::Method
2004 };
2005 let mut node = make_node(
2006 file_path,
2007 Language::Php,
2008 kind,
2009 name,
2010 line_no,
2011 indent as i64,
2012 now,
2013 Some(trimmed.to_string()),
2014 );
2015 node.visibility = cap
2016 .get(1)
2017 .map(|m| m.as_str().to_string())
2018 .or_else(|| Some("public".to_string()));
2019 node.is_exported = node.visibility.as_deref() == Some("public");
2020 node.is_static = cap
2021 .get(2)
2022 .map(|m| m.as_str().contains("static"))
2023 .unwrap_or(false)
2024 || trimmed.contains(" static ");
2025 if let Some((_, type_name, parent_id)) = type_stack.last() {
2026 node.qualified_name = format!("{}::{}", type_name, name);
2027 edges.push(Edge {
2028 id: None,
2029 source: parent_id.clone(),
2030 target: node.id.clone(),
2031 kind: EdgeKind::Contains,
2032 line: None,
2033 col: None,
2034 provenance: Some("php".into()),
2035 });
2036 } else {
2037 add_contains(nodes, edges, &node);
2038 }
2039 nodes.push(node);
2040 continue;
2041 }
2042
2043 if trimmed.starts_with("use ") && !type_stack.is_empty() {
2044 let Some((_, _, parent_id)) = type_stack.last() else {
2045 continue;
2046 };
2047 for name in trimmed
2048 .trim_start_matches("use ")
2049 .trim_end_matches(';')
2050 .split(',')
2051 .map(str::trim)
2052 .filter(|name| !name.is_empty())
2053 {
2054 refs_push(
2055 refs,
2056 parent_id,
2057 name,
2058 EdgeKind::Implements,
2059 file_path,
2060 Language::Php,
2061 line_no,
2062 0,
2063 );
2064 }
2065 }
2066 }
2067}
2068
2069fn add_php_inheritance_refs(
2070 node_id: &str,
2071 tail: &str,
2072 file_path: &str,
2073 line: i64,
2074 refs: &mut Vec<UnresolvedReference>,
2075) {
2076 let extends_re = Regex::new(r"\bextends\s+([A-Za-z_\\][A-Za-z0-9_\\]*)").unwrap();
2077 let implements_re = Regex::new(r"\bimplements\s+([A-Za-z_\\][A-Za-z0-9_\\,\s]*)").unwrap();
2078 if let Some(cap) = extends_re.captures(tail) {
2079 refs_push(
2080 refs,
2081 node_id,
2082 cap.get(1).unwrap().as_str(),
2083 EdgeKind::Extends,
2084 file_path,
2085 Language::Php,
2086 line,
2087 0,
2088 );
2089 }
2090 if let Some(cap) = implements_re.captures(tail) {
2091 for name in cap[1]
2092 .split(',')
2093 .map(str::trim)
2094 .filter(|name| !name.is_empty())
2095 {
2096 refs_push(
2097 refs,
2098 node_id,
2099 name,
2100 EdgeKind::Implements,
2101 file_path,
2102 Language::Php,
2103 line,
2104 0,
2105 );
2106 }
2107 }
2108}
2109
2110fn add_php_call_refs(
2111 file_path: &str,
2112 source: &str,
2113 nodes: &[Node],
2114 refs: &mut Vec<UnresolvedReference>,
2115) {
2116 let re = Regex::new(r"([A-Za-z_\\][A-Za-z0-9_\\]*(?:::[A-Za-z_][A-Za-z0-9_]*)?)\s*\(").unwrap();
2117 let keywords = [
2118 "if", "for", "foreach", "while", "switch", "catch", "return", "function",
2119 ];
2120 for cap in re.captures_iter(source) {
2121 let name_match = cap.get(1).unwrap();
2122 let name = name_match.as_str();
2123 let line = line_for(source, name_match.start());
2124 let line_text = source
2125 .lines()
2126 .nth(line.saturating_sub(1) as usize)
2127 .unwrap_or_default()
2128 .trim_start();
2129 if keywords.contains(&name) || line_text.contains(&format!("function {name}(")) {
2130 continue;
2131 }
2132 if let Some(caller) = nodes
2133 .iter()
2134 .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
2135 .rev()
2136 .find(|n| n.start_line <= line)
2137 {
2138 refs_push(
2139 refs,
2140 &caller.id,
2141 name,
2142 EdgeKind::Calls,
2143 file_path,
2144 Language::Php,
2145 line,
2146 0,
2147 );
2148 }
2149 }
2150}
2151
2152fn extract_ruby(
2153 file_path: &str,
2154 source: &str,
2155 now: i64,
2156 nodes: &mut Vec<Node>,
2157 edges: &mut Vec<Edge>,
2158 refs: &mut Vec<UnresolvedReference>,
2159) {
2160 let import_re = Regex::new(r#"^\s*(require|require_relative)\s+["']([^"']+)["']"#).unwrap();
2161 let module_re = Regex::new(r"^\s*module\s+([A-Z][A-Za-z0-9_:]*)").unwrap();
2162 let class_re =
2163 Regex::new(r"^\s*class\s+([A-Z][A-Za-z0-9_:]*)(?:\s*<\s*([A-Z][A-Za-z0-9_:]*))?").unwrap();
2164 let method_re = Regex::new(r"^\s*def\s+(?:(self)\.)?([A-Za-z_][A-Za-z0-9_!?=]*)").unwrap();
2165 let mut stack: Vec<(usize, NodeKind, String, String)> = Vec::new();
2166
2167 for (idx, line) in source.lines().enumerate() {
2168 let line_no = idx as i64 + 1;
2169 let trimmed = line.trim();
2170 if trimmed.is_empty() || trimmed.starts_with('#') {
2171 continue;
2172 }
2173 let indent = python_indent_width(line);
2174 if trimmed == "end" {
2175 stack.pop();
2176 continue;
2177 }
2178
2179 if let Some(cap) = import_re.captures(line) {
2180 let module = cap.get(2).unwrap();
2181 let node = make_node(
2182 file_path,
2183 Language::Ruby,
2184 NodeKind::Import,
2185 module.as_str(),
2186 line_no,
2187 indent as i64,
2188 now,
2189 Some(trimmed.to_string()),
2190 );
2191 add_contains(nodes, edges, &node);
2192 refs_push(
2193 refs,
2194 &nodes[0].id,
2195 module.as_str(),
2196 EdgeKind::Imports,
2197 file_path,
2198 Language::Ruby,
2199 line_no,
2200 0,
2201 );
2202 nodes.push(node);
2203 continue;
2204 }
2205
2206 if let Some(cap) = module_re.captures(line) {
2207 let name = cap.get(1).unwrap();
2208 let node = make_node(
2209 file_path,
2210 Language::Ruby,
2211 NodeKind::Module,
2212 name.as_str(),
2213 line_no,
2214 indent as i64,
2215 now,
2216 Some(trimmed.to_string()),
2217 );
2218 add_contains_from_ruby_stack(nodes, edges, &stack, &node);
2219 stack.push((
2220 indent,
2221 NodeKind::Module,
2222 name.as_str().to_string(),
2223 node.id.clone(),
2224 ));
2225 nodes.push(node);
2226 continue;
2227 }
2228
2229 if let Some(cap) = class_re.captures(line) {
2230 let name = cap.get(1).unwrap();
2231 let node = make_node(
2232 file_path,
2233 Language::Ruby,
2234 NodeKind::Class,
2235 name.as_str(),
2236 line_no,
2237 indent as i64,
2238 now,
2239 Some(trimmed.to_string()),
2240 );
2241 add_contains_from_ruby_stack(nodes, edges, &stack, &node);
2242 if let Some(parent) = cap.get(2) {
2243 refs_push(
2244 refs,
2245 &node.id,
2246 parent.as_str(),
2247 EdgeKind::Extends,
2248 file_path,
2249 Language::Ruby,
2250 line_no,
2251 0,
2252 );
2253 }
2254 stack.push((
2255 indent,
2256 NodeKind::Class,
2257 name.as_str().to_string(),
2258 node.id.clone(),
2259 ));
2260 nodes.push(node);
2261 continue;
2262 }
2263
2264 if let Some(cap) = method_re.captures(line) {
2265 let name = cap.get(2).unwrap().as_str();
2266 let mut node = make_node(
2267 file_path,
2268 Language::Ruby,
2269 NodeKind::Method,
2270 name,
2271 line_no,
2272 indent as i64,
2273 now,
2274 Some(trimmed.to_string()),
2275 );
2276 node.visibility = Some("public".to_string());
2277 node.is_exported = true;
2278 node.is_static = cap.get(1).is_some();
2279 if let Some((_, _, owner_name, _)) = stack
2280 .iter()
2281 .rev()
2282 .find(|(_, kind, _, _)| matches!(kind, NodeKind::Class | NodeKind::Module))
2283 {
2284 let sep = if node.is_static { "." } else { "#" };
2285 node.qualified_name = format!("{owner_name}{sep}{name}");
2286 }
2287 add_contains_from_ruby_stack(nodes, edges, &stack, &node);
2288 nodes.push(node);
2289 }
2290 }
2291
2292 add_call_refs(
2293 file_path,
2294 source,
2295 Language::Ruby,
2296 nodes,
2297 refs,
2298 r"([A-Za-z_][A-Za-z0-9_!?=]*)\s*\(",
2299 );
2300}
2301
2302fn add_contains_from_ruby_stack(
2303 nodes: &[Node],
2304 edges: &mut Vec<Edge>,
2305 stack: &[(usize, NodeKind, String, String)],
2306 node: &Node,
2307) {
2308 if let Some((_, _, _, source)) = stack.last() {
2309 edges.push(Edge {
2310 id: None,
2311 source: source.clone(),
2312 target: node.id.clone(),
2313 kind: EdgeKind::Contains,
2314 line: None,
2315 col: None,
2316 provenance: Some("ruby".into()),
2317 });
2318 } else {
2319 add_contains(nodes, edges, node);
2320 }
2321}
2322
2323fn extract_swift(
2324 file_path: &str,
2325 source: &str,
2326 now: i64,
2327 nodes: &mut Vec<Node>,
2328 edges: &mut Vec<Edge>,
2329 refs: &mut Vec<UnresolvedReference>,
2330) {
2331 add_swift_imports(file_path, source, now, nodes, edges, refs);
2332 add_swift_symbols(file_path, source, now, nodes, edges, refs);
2333 add_call_refs(
2334 file_path,
2335 source,
2336 Language::Swift,
2337 nodes,
2338 refs,
2339 r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
2340 );
2341}
2342
2343fn add_swift_imports(
2344 file_path: &str,
2345 source: &str,
2346 now: i64,
2347 nodes: &mut Vec<Node>,
2348 edges: &mut Vec<Edge>,
2349 refs: &mut Vec<UnresolvedReference>,
2350) {
2351 let re = Regex::new(r"(?m)^\s*import\s+([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
2352 for cap in re.captures_iter(source) {
2353 let module = cap.get(1).unwrap();
2354 let node = make_node(
2355 file_path,
2356 Language::Swift,
2357 NodeKind::Import,
2358 module.as_str(),
2359 line_for(source, module.start()),
2360 0,
2361 now,
2362 cap.get(0).map(|m| m.as_str().trim().to_string()),
2363 );
2364 add_contains(nodes, edges, &node);
2365 refs_push(
2366 refs,
2367 &nodes[0].id,
2368 module.as_str(),
2369 EdgeKind::Imports,
2370 file_path,
2371 Language::Swift,
2372 node.start_line,
2373 0,
2374 );
2375 nodes.push(node);
2376 }
2377}
2378
2379fn add_swift_symbols(
2380 file_path: &str,
2381 source: &str,
2382 now: i64,
2383 nodes: &mut Vec<Node>,
2384 edges: &mut Vec<Edge>,
2385 refs: &mut Vec<UnresolvedReference>,
2386) {
2387 let type_re = Regex::new(
2388 r"^\s*(?:(public|private|internal|fileprivate|open)\s+)?(?:(?:final|open)\s+)*(class|struct|protocol|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{]*)\{?",
2389 )
2390 .unwrap();
2391 let function_re = Regex::new(
2392 r"^\s*(?:(public|private|internal|fileprivate|open)\s+)?((?:static|class|mutating|async|final|override)\s+)*func\s+([A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?:async\s+)?(?:throws\s+)?(?:->\s*[A-Za-z_][A-Za-z0-9_<>,\.\[\]\?]*)?",
2393 )
2394 .unwrap();
2395 let typealias_re =
2396 Regex::new(r"^\s*(?:(public|private|internal|fileprivate|open)\s+)?typealias\s+([A-Za-z_][A-Za-z0-9_]*)\s*=")
2397 .unwrap();
2398 let mut type_stack: Vec<(usize, String, String)> = Vec::new();
2399
2400 for (idx, line) in source.lines().enumerate() {
2401 let line_no = idx as i64 + 1;
2402 let trimmed = line.trim();
2403 if trimmed.is_empty() || trimmed.starts_with("//") {
2404 continue;
2405 }
2406 let indent = python_indent_width(line);
2407 while type_stack
2408 .last()
2409 .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
2410 {
2411 type_stack.pop();
2412 }
2413
2414 if let Some(cap) = type_re.captures(line) {
2415 let kind = match cap.get(2).unwrap().as_str() {
2416 "struct" => NodeKind::Struct,
2417 "protocol" => NodeKind::Protocol,
2418 "enum" => NodeKind::Enum,
2419 _ => NodeKind::Class,
2420 };
2421 let name = cap.get(3).unwrap();
2422 let mut node = make_node(
2423 file_path,
2424 Language::Swift,
2425 kind,
2426 name.as_str(),
2427 line_no,
2428 indent as i64,
2429 now,
2430 Some(trimmed.to_string()),
2431 );
2432 node.visibility = cap
2433 .get(1)
2434 .map(|m| swift_visibility(m.as_str()).to_string())
2435 .or_else(|| Some("internal".to_string()));
2436 node.is_exported = matches!(node.visibility.as_deref(), Some("public" | "open"));
2437 add_contains(nodes, edges, &node);
2438 add_swift_inheritance_refs(
2439 &node.id,
2440 cap.get(4).map(|m| m.as_str()).unwrap_or_default(),
2441 file_path,
2442 line_no,
2443 refs,
2444 );
2445 if !trimmed.contains('}') {
2446 type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
2447 }
2448 nodes.push(node);
2449 continue;
2450 }
2451
2452 if let Some(cap) = function_re.captures(line) {
2453 let name = cap.get(3).unwrap().as_str();
2454 let kind = if type_stack.is_empty() {
2455 NodeKind::Function
2456 } else {
2457 NodeKind::Method
2458 };
2459 let mut node = make_node(
2460 file_path,
2461 Language::Swift,
2462 kind,
2463 name,
2464 line_no,
2465 indent as i64,
2466 now,
2467 Some(trimmed.to_string()),
2468 );
2469 node.visibility = cap
2470 .get(1)
2471 .map(|m| swift_visibility(m.as_str()).to_string())
2472 .or_else(|| Some("internal".to_string()));
2473 node.is_exported = matches!(node.visibility.as_deref(), Some("public" | "open"));
2474 let modifiers = cap.get(2).map(|m| m.as_str()).unwrap_or_default();
2475 node.is_static = modifiers.contains("static")
2476 || modifiers.contains("class")
2477 || trimmed.contains(" static ")
2478 || trimmed.contains(" class ");
2479 node.is_async = modifiers.contains("async") || trimmed.contains(" async ");
2480 if let Some((_, type_name, parent_id)) = type_stack.last() {
2481 node.qualified_name = format!("{}.{}", type_name, name);
2482 edges.push(Edge {
2483 id: None,
2484 source: parent_id.clone(),
2485 target: node.id.clone(),
2486 kind: EdgeKind::Contains,
2487 line: None,
2488 col: None,
2489 provenance: Some("swift".into()),
2490 });
2491 } else {
2492 add_contains(nodes, edges, &node);
2493 }
2494 nodes.push(node);
2495 continue;
2496 }
2497
2498 if let Some(cap) = typealias_re.captures(line) {
2499 let name = cap.get(2).unwrap();
2500 let mut node = make_node(
2501 file_path,
2502 Language::Swift,
2503 NodeKind::TypeAlias,
2504 name.as_str(),
2505 line_no,
2506 indent as i64,
2507 now,
2508 Some(trimmed.to_string()),
2509 );
2510 node.visibility = cap
2511 .get(1)
2512 .map(|m| swift_visibility(m.as_str()).to_string())
2513 .or_else(|| Some("internal".to_string()));
2514 node.is_exported = matches!(node.visibility.as_deref(), Some("public" | "open"));
2515 add_contains(nodes, edges, &node);
2516 nodes.push(node);
2517 }
2518 }
2519}
2520
2521fn swift_visibility(visibility: &str) -> &str {
2522 match visibility {
2523 "fileprivate" => "private",
2524 other => other,
2525 }
2526}
2527
2528fn add_swift_inheritance_refs(
2529 node_id: &str,
2530 tail: &str,
2531 file_path: &str,
2532 line: i64,
2533 refs: &mut Vec<UnresolvedReference>,
2534) {
2535 let Some(base_tail) = tail.trim().strip_prefix(':') else {
2536 return;
2537 };
2538 let mut names = base_tail
2539 .split(',')
2540 .map(str::trim)
2541 .filter(|name| !name.is_empty())
2542 .map(|name| name.split_whitespace().next().unwrap_or(name));
2543 if let Some(first) = names.next() {
2544 refs_push(
2545 refs,
2546 node_id,
2547 first,
2548 EdgeKind::Extends,
2549 file_path,
2550 Language::Swift,
2551 line,
2552 0,
2553 );
2554 }
2555 for name in names {
2556 refs_push(
2557 refs,
2558 node_id,
2559 name,
2560 EdgeKind::Implements,
2561 file_path,
2562 Language::Swift,
2563 line,
2564 0,
2565 );
2566 }
2567}
2568
2569fn extract_dart_pascal_scala(
2570 file_path: &str,
2571 source: &str,
2572 language: Language,
2573 now: i64,
2574 nodes: &mut Vec<Node>,
2575 edges: &mut Vec<Edge>,
2576 refs: &mut Vec<UnresolvedReference>,
2577) {
2578 match language {
2579 Language::Dart => extract_dart(file_path, source, now, nodes, edges, refs),
2580 Language::Pascal => extract_pascal(file_path, source, now, nodes, edges, refs),
2581 Language::Scala => extract_scala(file_path, source, now, nodes, edges, refs),
2582 _ => {}
2583 }
2584}
2585
2586fn extract_dart(
2587 file_path: &str,
2588 source: &str,
2589 now: i64,
2590 nodes: &mut Vec<Node>,
2591 edges: &mut Vec<Edge>,
2592 refs: &mut Vec<UnresolvedReference>,
2593) {
2594 let import_re = Regex::new(
2595 r#"(?m)^\s*(?:import|export)\s+['"]([^'"]+)['"](?:\s+as\s+[A-Za-z_][A-Za-z0-9_]*)?\s*;"#,
2596 )
2597 .unwrap();
2598 for cap in import_re.captures_iter(source) {
2599 let module = cap.get(1).unwrap();
2600 let node = make_node(
2601 file_path,
2602 Language::Dart,
2603 NodeKind::Import,
2604 module.as_str(),
2605 line_for(source, module.start()),
2606 0,
2607 now,
2608 cap.get(0).map(|m| m.as_str().trim().to_string()),
2609 );
2610 add_contains(nodes, edges, &node);
2611 refs_push(
2612 refs,
2613 &nodes[0].id,
2614 module.as_str(),
2615 EdgeKind::Imports,
2616 file_path,
2617 Language::Dart,
2618 node.start_line,
2619 0,
2620 );
2621 nodes.push(node);
2622 }
2623
2624 let type_re =
2625 Regex::new(r"^\s*(class|mixin|extension|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{]*)\{?")
2626 .unwrap();
2627 let function_re = Regex::new(
2628 r"^\s*(?:static\s+)?(?:[A-Za-z_][A-Za-z0-9_<>,\?\[\]]*\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\([^;{}]*\)\s*(?:async\s*)?\{?",
2629 )
2630 .unwrap();
2631 let typealias_re = Regex::new(r"^\s*typedef\s+([A-Za-z_][A-Za-z0-9_]*)\s*=").unwrap();
2632 let mut type_stack: Vec<(usize, String, String)> = Vec::new();
2633 for (idx, line) in source.lines().enumerate() {
2634 let line_no = idx as i64 + 1;
2635 let trimmed = line.trim();
2636 if trimmed.is_empty() || trimmed.starts_with("//") {
2637 continue;
2638 }
2639 let indent = python_indent_width(line);
2640 while type_stack
2641 .last()
2642 .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
2643 {
2644 type_stack.pop();
2645 }
2646 if let Some(cap) = type_re.captures(line) {
2647 let kind = match cap.get(1).unwrap().as_str() {
2648 "enum" => NodeKind::Enum,
2649 "mixin" | "extension" => NodeKind::Trait,
2650 _ => NodeKind::Class,
2651 };
2652 let name = cap.get(2).unwrap();
2653 let mut node = make_node(
2654 file_path,
2655 Language::Dart,
2656 kind,
2657 name.as_str(),
2658 line_no,
2659 indent as i64,
2660 now,
2661 Some(trimmed.to_string()),
2662 );
2663 node.visibility = Some(dart_visibility(name.as_str()).to_string());
2664 node.is_exported = node.visibility.as_deref() == Some("public");
2665 add_contains(nodes, edges, &node);
2666 add_dart_inheritance_refs(
2667 &node.id,
2668 cap.get(3).map(|m| m.as_str()).unwrap_or_default(),
2669 file_path,
2670 line_no,
2671 refs,
2672 );
2673 if !trimmed.contains('}') {
2674 type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
2675 }
2676 nodes.push(node);
2677 continue;
2678 }
2679 if let Some(cap) = typealias_re.captures(line) {
2680 let name = cap.get(1).unwrap();
2681 let mut node = make_node(
2682 file_path,
2683 Language::Dart,
2684 NodeKind::TypeAlias,
2685 name.as_str(),
2686 line_no,
2687 indent as i64,
2688 now,
2689 Some(trimmed.to_string()),
2690 );
2691 node.visibility = Some(dart_visibility(name.as_str()).to_string());
2692 node.is_exported = node.visibility.as_deref() == Some("public");
2693 add_contains(nodes, edges, &node);
2694 nodes.push(node);
2695 continue;
2696 }
2697 if let Some(cap) = function_re.captures(line) {
2698 let name = cap.get(1).unwrap().as_str();
2699 if matches!(name, "if" | "for" | "while" | "switch" | "return") {
2700 continue;
2701 }
2702 if !trimmed.contains('{') {
2703 continue;
2704 }
2705 let kind = if type_stack.is_empty() {
2706 NodeKind::Function
2707 } else {
2708 NodeKind::Method
2709 };
2710 let mut node = make_node(
2711 file_path,
2712 Language::Dart,
2713 kind,
2714 name,
2715 line_no,
2716 indent as i64,
2717 now,
2718 Some(trimmed.to_string()),
2719 );
2720 node.visibility = Some(dart_visibility(name).to_string());
2721 node.is_exported = node.visibility.as_deref() == Some("public");
2722 node.is_static = trimmed.starts_with("static ") || trimmed.contains(" static ");
2723 node.is_async = trimmed.contains(" async");
2724 if let Some((_, type_name, parent_id)) = type_stack.last() {
2725 node.qualified_name = format!("{}.{}", type_name, name);
2726 edges.push(Edge {
2727 id: None,
2728 source: parent_id.clone(),
2729 target: node.id.clone(),
2730 kind: EdgeKind::Contains,
2731 line: None,
2732 col: None,
2733 provenance: Some("dart".into()),
2734 });
2735 } else {
2736 add_contains(nodes, edges, &node);
2737 }
2738 nodes.push(node);
2739 }
2740 }
2741 add_call_refs(
2742 file_path,
2743 source,
2744 Language::Dart,
2745 nodes,
2746 refs,
2747 r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
2748 );
2749}
2750
2751fn dart_visibility(name: &str) -> &str {
2752 if name.starts_with('_') {
2753 "private"
2754 } else {
2755 "public"
2756 }
2757}
2758
2759fn add_dart_inheritance_refs(
2760 node_id: &str,
2761 tail: &str,
2762 file_path: &str,
2763 line: i64,
2764 refs: &mut Vec<UnresolvedReference>,
2765) {
2766 let extends_re = Regex::new(r"\bextends\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap();
2767 let implements_re = Regex::new(r"\bimplements\s+([A-Za-z_][A-Za-z0-9_,\s]*)").unwrap();
2768 let with_re = Regex::new(r"\bwith\s+([A-Za-z_][A-Za-z0-9_,\s]*)").unwrap();
2769 let on_re = Regex::new(r"\bon\s+([A-Za-z_][A-Za-z0-9_,\s]*)").unwrap();
2770 if let Some(cap) = extends_re.captures(tail).or_else(|| on_re.captures(tail)) {
2771 refs_push(
2772 refs,
2773 node_id,
2774 cap.get(1).unwrap().as_str(),
2775 EdgeKind::Extends,
2776 file_path,
2777 Language::Dart,
2778 line,
2779 0,
2780 );
2781 }
2782 for re in [&implements_re, &with_re] {
2783 if let Some(cap) = re.captures(tail) {
2784 for name in cap[1]
2785 .split(',')
2786 .map(str::trim)
2787 .filter(|name| !name.is_empty())
2788 {
2789 refs_push(
2790 refs,
2791 node_id,
2792 name,
2793 EdgeKind::Implements,
2794 file_path,
2795 Language::Dart,
2796 line,
2797 0,
2798 );
2799 }
2800 }
2801 }
2802}
2803
2804fn extract_pascal(
2805 file_path: &str,
2806 source: &str,
2807 now: i64,
2808 nodes: &mut Vec<Node>,
2809 edges: &mut Vec<Edge>,
2810 refs: &mut Vec<UnresolvedReference>,
2811) {
2812 let unit_re = Regex::new(r"(?i)^\s*unit\s+([A-Za-z_][A-Za-z0-9_]*)\s*;").unwrap();
2813 let uses_re = Regex::new(r"(?i)^\s*uses\s+([^;]+);").unwrap();
2814 let class_re =
2815 Regex::new(r"(?i)^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*class(?:\(([^)]*)\))?").unwrap();
2816 let proc_re = Regex::new(
2817 r"(?i)^\s*(?:class\s+)?(procedure|function)\s+([A-Za-z_][A-Za-z0-9_\.]*)\s*(?:\([^;]*\))?\s*(?::\s*[A-Za-z_][A-Za-z0-9_]*)?\s*;",
2818 )
2819 .unwrap();
2820 let mut current_class: Option<(String, String)> = None;
2821
2822 for (idx, line) in source.lines().enumerate() {
2823 let line_no = idx as i64 + 1;
2824 let trimmed = line.trim();
2825 if trimmed.is_empty() || trimmed.starts_with("//") {
2826 continue;
2827 }
2828 if let Some(cap) = unit_re.captures(line) {
2829 let name = cap.get(1).unwrap();
2830 let node = make_node(
2831 file_path,
2832 Language::Pascal,
2833 NodeKind::Module,
2834 name.as_str(),
2835 line_no,
2836 0,
2837 now,
2838 Some(trimmed.to_string()),
2839 );
2840 add_contains(nodes, edges, &node);
2841 nodes.push(node);
2842 continue;
2843 }
2844 if let Some(cap) = uses_re.captures(line) {
2845 for module in cap[1].split(',').map(str::trim).filter(|m| !m.is_empty()) {
2846 let node = make_node(
2847 file_path,
2848 Language::Pascal,
2849 NodeKind::Import,
2850 module,
2851 line_no,
2852 0,
2853 now,
2854 Some(trimmed.to_string()),
2855 );
2856 add_contains(nodes, edges, &node);
2857 refs_push(
2858 refs,
2859 &nodes[0].id,
2860 module,
2861 EdgeKind::Imports,
2862 file_path,
2863 Language::Pascal,
2864 line_no,
2865 0,
2866 );
2867 nodes.push(node);
2868 }
2869 continue;
2870 }
2871 if let Some(cap) = class_re.captures(line) {
2872 let name = cap.get(1).unwrap();
2873 let node = make_node(
2874 file_path,
2875 Language::Pascal,
2876 NodeKind::Class,
2877 name.as_str(),
2878 line_no,
2879 0,
2880 now,
2881 Some(trimmed.to_string()),
2882 );
2883 add_contains(nodes, edges, &node);
2884 if let Some(parent) = cap.get(2) {
2885 refs_push(
2886 refs,
2887 &node.id,
2888 parent.as_str().trim(),
2889 EdgeKind::Extends,
2890 file_path,
2891 Language::Pascal,
2892 line_no,
2893 0,
2894 );
2895 }
2896 current_class = Some((name.as_str().to_string(), node.id.clone()));
2897 nodes.push(node);
2898 continue;
2899 }
2900 if trimmed.eq_ignore_ascii_case("end;") {
2901 current_class = None;
2902 continue;
2903 }
2904 if let Some(cap) = proc_re.captures(line) {
2905 let raw_name = cap.get(2).unwrap().as_str();
2906 let name = raw_name.rsplit('.').next().unwrap_or(raw_name);
2907 let kind = if raw_name.contains('.') || current_class.is_some() {
2908 NodeKind::Method
2909 } else {
2910 NodeKind::Function
2911 };
2912 let mut node = make_node(
2913 file_path,
2914 Language::Pascal,
2915 kind,
2916 name,
2917 line_no,
2918 0,
2919 now,
2920 Some(trimmed.to_string()),
2921 );
2922 node.is_static = trimmed.to_lowercase().starts_with("class ");
2923 if raw_name.contains('.') {
2924 let owner = raw_name
2925 .rsplit_once('.')
2926 .map(|(owner, _)| owner)
2927 .unwrap_or(raw_name);
2928 node.qualified_name = format!("{}.{}", owner, name);
2929 } else if let Some((owner, parent_id)) = ¤t_class {
2930 node.qualified_name = format!("{}.{}", owner, name);
2931 edges.push(Edge {
2932 id: None,
2933 source: parent_id.clone(),
2934 target: node.id.clone(),
2935 kind: EdgeKind::Contains,
2936 line: None,
2937 col: None,
2938 provenance: Some("pascal".into()),
2939 });
2940 nodes.push(node);
2941 continue;
2942 }
2943 add_contains(nodes, edges, &node);
2944 nodes.push(node);
2945 }
2946 }
2947 add_call_refs(
2948 file_path,
2949 source,
2950 Language::Pascal,
2951 nodes,
2952 refs,
2953 r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
2954 );
2955 add_pascal_bare_call_refs(file_path, source, nodes, refs);
2956}
2957
2958fn add_pascal_bare_call_refs(
2959 file_path: &str,
2960 source: &str,
2961 nodes: &[Node],
2962 refs: &mut Vec<UnresolvedReference>,
2963) {
2964 let re = Regex::new(r"(?im)^\s*([A-Za-z_][A-Za-z0-9_]*\.[A-Za-z_][A-Za-z0-9_]*)\s*;").unwrap();
2965 for cap in re.captures_iter(source) {
2966 let name = cap.get(1).unwrap();
2967 let line = line_for(source, name.start());
2968 if let Some(caller) = nodes
2969 .iter()
2970 .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
2971 .rev()
2972 .find(|n| n.start_line <= line)
2973 {
2974 refs_push(
2975 refs,
2976 &caller.id,
2977 name.as_str(),
2978 EdgeKind::Calls,
2979 file_path,
2980 Language::Pascal,
2981 line,
2982 0,
2983 );
2984 }
2985 }
2986}
2987
2988fn extract_scala(
2989 file_path: &str,
2990 source: &str,
2991 now: i64,
2992 nodes: &mut Vec<Node>,
2993 edges: &mut Vec<Edge>,
2994 refs: &mut Vec<UnresolvedReference>,
2995) {
2996 let import_re = Regex::new(r"(?m)^\s*import\s+([A-Za-z_][A-Za-z0-9_\.\{\}, \t]*)").unwrap();
2997 for cap in import_re.captures_iter(source) {
2998 let module = cap.get(1).unwrap().as_str().trim();
2999 let node = make_node(
3000 file_path,
3001 Language::Scala,
3002 NodeKind::Import,
3003 module,
3004 line_for(source, cap.get(1).unwrap().start()),
3005 0,
3006 now,
3007 cap.get(0).map(|m| m.as_str().trim().to_string()),
3008 );
3009 add_contains(nodes, edges, &node);
3010 refs_push(
3011 refs,
3012 &nodes[0].id,
3013 module,
3014 EdgeKind::Imports,
3015 file_path,
3016 Language::Scala,
3017 node.start_line,
3018 0,
3019 );
3020 nodes.push(node);
3021 }
3022
3023 let type_re = Regex::new(
3024 r"^\s*(?:(private|protected)\s+)?(class|object|trait|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{=]*)",
3025 )
3026 .unwrap();
3027 let def_re = Regex::new(
3028 r"^\s*(?:(private|protected)\s+)?(?:override\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?::\s*[A-Za-z_][A-Za-z0-9_\[\],\s]*)?",
3029 )
3030 .unwrap();
3031 let typealias_re = Regex::new(r"^\s*type\s+([A-Za-z_][A-Za-z0-9_]*)\s*=").unwrap();
3032 let mut type_stack: Vec<(usize, String, String)> = Vec::new();
3033 for (idx, line) in source.lines().enumerate() {
3034 let line_no = idx as i64 + 1;
3035 let trimmed = line.trim();
3036 if trimmed.is_empty() || trimmed.starts_with("//") {
3037 continue;
3038 }
3039 let indent = python_indent_width(line);
3040 while type_stack
3041 .last()
3042 .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
3043 {
3044 type_stack.pop();
3045 }
3046 if let Some(cap) = type_re.captures(line) {
3047 let kind = match cap.get(2).unwrap().as_str() {
3048 "trait" => NodeKind::Trait,
3049 "enum" => NodeKind::Enum,
3050 "object" => NodeKind::Module,
3051 _ => NodeKind::Class,
3052 };
3053 let name = cap.get(3).unwrap();
3054 let mut node = make_node(
3055 file_path,
3056 Language::Scala,
3057 kind,
3058 name.as_str(),
3059 line_no,
3060 indent as i64,
3061 now,
3062 Some(trimmed.to_string()),
3063 );
3064 node.visibility = cap
3065 .get(1)
3066 .map(|m| m.as_str().to_string())
3067 .or_else(|| Some("public".to_string()));
3068 node.is_exported = node.visibility.as_deref() == Some("public");
3069 add_contains(nodes, edges, &node);
3070 add_scala_inheritance_refs(
3071 &node.id,
3072 cap.get(4).map(|m| m.as_str()).unwrap_or_default(),
3073 file_path,
3074 line_no,
3075 refs,
3076 );
3077 if !trimmed.contains('}') {
3078 type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
3079 }
3080 nodes.push(node);
3081 continue;
3082 }
3083 if let Some(cap) = def_re.captures(line) {
3084 let name = cap.get(2).unwrap().as_str();
3085 let kind = if type_stack.is_empty() {
3086 NodeKind::Function
3087 } else {
3088 NodeKind::Method
3089 };
3090 let mut node = make_node(
3091 file_path,
3092 Language::Scala,
3093 kind,
3094 name,
3095 line_no,
3096 indent as i64,
3097 now,
3098 Some(trimmed.to_string()),
3099 );
3100 node.visibility = cap
3101 .get(1)
3102 .map(|m| m.as_str().to_string())
3103 .or_else(|| Some("public".to_string()));
3104 node.is_exported = node.visibility.as_deref() == Some("public");
3105 if let Some((_, owner, parent_id)) = type_stack.last() {
3106 node.qualified_name = format!("{}.{}", owner, name);
3107 edges.push(Edge {
3108 id: None,
3109 source: parent_id.clone(),
3110 target: node.id.clone(),
3111 kind: EdgeKind::Contains,
3112 line: None,
3113 col: None,
3114 provenance: Some("scala".into()),
3115 });
3116 } else {
3117 add_contains(nodes, edges, &node);
3118 }
3119 nodes.push(node);
3120 continue;
3121 }
3122 if let Some(cap) = typealias_re.captures(line) {
3123 let name = cap.get(1).unwrap();
3124 let node = make_node(
3125 file_path,
3126 Language::Scala,
3127 NodeKind::TypeAlias,
3128 name.as_str(),
3129 line_no,
3130 indent as i64,
3131 now,
3132 Some(trimmed.to_string()),
3133 );
3134 add_contains(nodes, edges, &node);
3135 nodes.push(node);
3136 }
3137 }
3138 add_call_refs(
3139 file_path,
3140 source,
3141 Language::Scala,
3142 nodes,
3143 refs,
3144 r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
3145 );
3146}
3147
3148fn add_scala_inheritance_refs(
3149 node_id: &str,
3150 tail: &str,
3151 file_path: &str,
3152 line: i64,
3153 refs: &mut Vec<UnresolvedReference>,
3154) {
3155 let extends_re = Regex::new(r"\bextends\s+([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
3156 let with_re = Regex::new(r"\bwith\s+([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
3157 if let Some(cap) = extends_re.captures(tail) {
3158 refs_push(
3159 refs,
3160 node_id,
3161 cap.get(1).unwrap().as_str(),
3162 EdgeKind::Extends,
3163 file_path,
3164 Language::Scala,
3165 line,
3166 0,
3167 );
3168 }
3169 for cap in with_re.captures_iter(tail) {
3170 refs_push(
3171 refs,
3172 node_id,
3173 cap.get(1).unwrap().as_str(),
3174 EdgeKind::Implements,
3175 file_path,
3176 Language::Scala,
3177 line,
3178 0,
3179 );
3180 }
3181}
3182
3183fn extract_rust(
3184 file_path: &str,
3185 source: &str,
3186 now: i64,
3187 nodes: &mut Vec<Node>,
3188 edges: &mut Vec<Edge>,
3189 refs: &mut Vec<UnresolvedReference>,
3190) {
3191 if try_extract_rust_tree_sitter(file_path, source, now, nodes, edges, refs) {
3192 return;
3193 }
3194
3195 add_regex_nodes(
3196 file_path,
3197 source,
3198 Language::Rust,
3199 now,
3200 nodes,
3201 edges,
3202 r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*([^{;]*)",
3203 NodeKind::Function,
3204 );
3205 add_regex_nodes(
3206 file_path,
3207 source,
3208 Language::Rust,
3209 now,
3210 nodes,
3211 edges,
3212 r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?struct\s+([A-Za-z_][A-Za-z0-9_]*)",
3213 NodeKind::Struct,
3214 );
3215 add_regex_nodes(
3216 file_path,
3217 source,
3218 Language::Rust,
3219 now,
3220 nodes,
3221 edges,
3222 r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?trait\s+([A-Za-z_][A-Za-z0-9_]*)",
3223 NodeKind::Trait,
3224 );
3225 add_regex_nodes(
3226 file_path,
3227 source,
3228 Language::Rust,
3229 now,
3230 nodes,
3231 edges,
3232 r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?enum\s+([A-Za-z_][A-Za-z0-9_]*)",
3233 NodeKind::Enum,
3234 );
3235 add_regex_nodes(
3236 file_path,
3237 source,
3238 Language::Rust,
3239 now,
3240 nodes,
3241 edges,
3242 r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?type\s+([A-Za-z_][A-Za-z0-9_]*)",
3243 NodeKind::TypeAlias,
3244 );
3245
3246 let use_re = Regex::new(r"(?m)^\s*use\s+([^;]+);").unwrap();
3247 for cap in use_re.captures_iter(source) {
3248 let full = cap.get(1).unwrap();
3249 let root = full
3250 .as_str()
3251 .split("::")
3252 .next()
3253 .unwrap_or(full.as_str())
3254 .trim_matches('{')
3255 .trim();
3256 let node = make_node(
3257 file_path,
3258 Language::Rust,
3259 NodeKind::Import,
3260 root,
3261 line_for(source, full.start()),
3262 0,
3263 now,
3264 Some(format!("use {};", full.as_str())),
3265 );
3266 add_contains(nodes, edges, &node);
3267 refs.push(unresolved(
3268 &nodes[0].id,
3269 root,
3270 EdgeKind::Imports,
3271 file_path,
3272 Language::Rust,
3273 node.start_line,
3274 ));
3275 nodes.push(node);
3276 }
3277
3278 let impl_re = Regex::new(
3279 r"(?m)^\s*impl(?:<[^>]+>)?\s+([A-Za-z_][A-Za-z0-9_:]*)\s+for\s+([A-Za-z_][A-Za-z0-9_]*)",
3280 )
3281 .unwrap();
3282 for cap in impl_re.captures_iter(source) {
3283 let trait_name = cap.get(1).unwrap().as_str().rsplit("::").next().unwrap();
3284 let type_name = cap.get(2).unwrap().as_str();
3285 if let Some(src) = nodes
3286 .iter()
3287 .find(|n| n.name == type_name && matches!(n.kind, NodeKind::Struct | NodeKind::Enum))
3288 .map(|n| n.id.clone())
3289 {
3290 refs.push(unresolved(
3291 &src,
3292 trait_name,
3293 EdgeKind::Implements,
3294 file_path,
3295 Language::Rust,
3296 line_for(source, cap.get(1).unwrap().start()),
3297 ));
3298 }
3299 }
3300 add_call_refs(
3301 file_path,
3302 source,
3303 Language::Rust,
3304 nodes,
3305 refs,
3306 r"([A-Za-z_][A-Za-z0-9_:]*)\s*\(",
3307 );
3308}
3309
3310fn extract_moonbit(
3311 file_path: &str,
3312 source: &str,
3313 now: i64,
3314 nodes: &mut Vec<Node>,
3315 edges: &mut Vec<Edge>,
3316 refs: &mut Vec<UnresolvedReference>,
3317) {
3318 if file_path.ends_with("moon.mod.json")
3319 || file_path.ends_with("moon.pkg.json")
3320 || file_path.ends_with("moon.pkg")
3321 {
3322 extract_moonbit_metadata(file_path, source, now, nodes, edges, refs);
3323 return;
3324 }
3325
3326 let source = if file_path.ends_with(".mbt.md") {
3327 extract_mbt_markdown_code_with_padding(source)
3328 } else {
3329 source.to_string()
3330 };
3331
3332 if try_extract_moonbit_tree_sitter(file_path, &source, now, nodes, edges, refs) {
3333 extract_moonbit_sol_routes(file_path, &source, now, nodes, edges, refs);
3334 return;
3335 }
3336
3337 add_regex_nodes(
3338 file_path,
3339 &source,
3340 Language::MoonBit,
3341 now,
3342 nodes,
3343 edges,
3344 r"(?m)^\s*(pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*([^{]*)",
3345 NodeKind::Function,
3346 );
3347 add_regex_nodes(
3348 file_path,
3349 &source,
3350 Language::MoonBit,
3351 now,
3352 nodes,
3353 edges,
3354 r"(?m)^\s*(pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*::[A-Za-z_][A-Za-z0-9_]*)\s*([^{]*)",
3355 NodeKind::Method,
3356 );
3357 add_regex_nodes(
3358 file_path,
3359 &source,
3360 Language::MoonBit,
3361 now,
3362 nodes,
3363 edges,
3364 r"(?m)^\s*(pub\s+)?struct\s+([A-Za-z_][A-Za-z0-9_]*)",
3365 NodeKind::Struct,
3366 );
3367 add_regex_nodes(
3368 file_path,
3369 &source,
3370 Language::MoonBit,
3371 now,
3372 nodes,
3373 edges,
3374 r"(?m)^\s*(pub\s+)?trait\s+([A-Za-z_][A-Za-z0-9_]*)",
3375 NodeKind::Trait,
3376 );
3377 add_regex_nodes(
3378 file_path,
3379 &source,
3380 Language::MoonBit,
3381 now,
3382 nodes,
3383 edges,
3384 r"(?m)^\s*(pub\s+)?enum\s+([A-Za-z_][A-Za-z0-9_]*)",
3385 NodeKind::Enum,
3386 );
3387 add_regex_nodes(
3388 file_path,
3389 &source,
3390 Language::MoonBit,
3391 now,
3392 nodes,
3393 edges,
3394 r"(?m)^\s*(pub\s+)?type\s+([A-Za-z_][A-Za-z0-9_]*)",
3395 NodeKind::TypeAlias,
3396 );
3397 add_regex_nodes(
3398 file_path,
3399 &source,
3400 Language::MoonBit,
3401 now,
3402 nodes,
3403 edges,
3404 r"(?m)^\s*(pub\s+)?let\s+([A-Za-z_][A-Za-z0-9_]*)",
3405 NodeKind::Variable,
3406 );
3407
3408 let import_re =
3409 Regex::new(r#"(?m)^\s*import\s+([@\w/.\-]+)(?:\s+as\s+([A-Za-z_][A-Za-z0-9_]*))?"#)
3410 .unwrap();
3411 for cap in import_re.captures_iter(&source) {
3412 let package = cap.get(1).unwrap().as_str();
3413 let name = cap.get(2).map(|m| m.as_str()).unwrap_or(package);
3414 let node = make_node(
3415 file_path,
3416 Language::MoonBit,
3417 NodeKind::Import,
3418 name,
3419 line_for(&source, cap.get(0).unwrap().start()),
3420 0,
3421 now,
3422 Some(cap.get(0).unwrap().as_str().to_string()),
3423 );
3424 add_contains(nodes, edges, &node);
3425 refs.push(unresolved(
3426 &nodes[0].id,
3427 name,
3428 EdgeKind::Imports,
3429 file_path,
3430 Language::MoonBit,
3431 node.start_line,
3432 ));
3433 nodes.push(node);
3434 }
3435 add_call_refs(
3436 file_path,
3437 &source,
3438 Language::MoonBit,
3439 nodes,
3440 refs,
3441 r"([@A-Za-z_][@A-Za-z0-9_:/]*)\s*\(",
3442 );
3443 extract_moonbit_sol_routes(file_path, &source, now, nodes, edges, refs);
3444}
3445
3446fn extract_moonbit_sol_routes(
3447 file_path: &str,
3448 source: &str,
3449 now: i64,
3450 nodes: &mut Vec<Node>,
3451 edges: &mut Vec<Edge>,
3452 refs: &mut Vec<UnresolvedReference>,
3453) {
3454 if !file_path.ends_with(".mbt") && !file_path.ends_with(".mbt.md") {
3455 return;
3456 }
3457
3458 let safe = strip_moonbit_comments_preserve_lines(source);
3459 let call_re = Regex::new(
3460 r#"@(?:sol|router)\.(route|page|api_get|api_post|api_put|api_delete|api_patch|raw_get|raw_post|raw_put|raw_delete|raw_patch)\s*\(\s*"([^"]+)"\s*,\s*([@A-Za-z_][@A-Za-z0-9_:.]*)"#,
3461 )
3462 .unwrap();
3463 let wrap_re = Regex::new(r#"@(?:sol|router)\.wrap\s*\(\s*"([^"]*)"\s*,"#).unwrap();
3464 let constructor_re = Regex::new(
3465 r#"SolRoutes::(Page|RawGet|RawPost|RawPut|RawDelete|RawPatch)\s*\([^)]*path\s*=\s*"([^"]+)"[^)]*handler\s*=\s*(?:PageHandler|RawHandler)?\(?\s*([@A-Za-z_][@A-Za-z0-9_:.]*)"#,
3466 )
3467 .unwrap();
3468 let named_page_re = Regex::new(
3469 r#"@(?:sol|router)\.page\s*\([^)]*path\s*=\s*"([^"]+)"[^)]*handler\s*=\s*([@A-Za-z_][@A-Za-z0-9_:.]*)"#,
3470 )
3471 .unwrap();
3472
3473 let mut prefix_stack: Vec<(usize, String)> = Vec::new();
3474 let mut byte_offset = 0usize;
3475 for line in safe.lines() {
3476 let indent = line.chars().take_while(|c| c.is_whitespace()).count();
3477 while prefix_stack
3478 .last()
3479 .map(|(stack_indent, _)| indent <= *stack_indent && line.trim_start().starts_with(']'))
3480 .unwrap_or(false)
3481 {
3482 prefix_stack.pop();
3483 }
3484
3485 if let Some(cap) = wrap_re.captures(line) {
3486 let prefix = cap.get(1).map(|m| m.as_str()).unwrap_or("");
3487 let full_prefix = join_route_paths(current_route_prefix(&prefix_stack), prefix);
3488 prefix_stack.push((indent, full_prefix));
3489 }
3490
3491 for cap in call_re.captures_iter(line) {
3492 let helper = cap.get(1).unwrap().as_str();
3493 let path = cap.get(2).unwrap().as_str();
3494 let handler = cap.get(3).map(|m| clean_moonbit_handler(m.as_str()));
3495 let route_path = join_route_paths(current_route_prefix(&prefix_stack), path);
3496 add_moonbit_route_node(
3497 file_path,
3498 &safe,
3499 byte_offset + cap.get(0).unwrap().start(),
3500 helper_route_method(helper),
3501 &route_path,
3502 handler.as_deref(),
3503 now,
3504 nodes,
3505 edges,
3506 refs,
3507 );
3508 }
3509
3510 for cap in named_page_re.captures_iter(line) {
3511 let path = cap.get(1).unwrap().as_str();
3512 let handler = cap.get(2).map(|m| clean_moonbit_handler(m.as_str()));
3513 let route_path = join_route_paths(current_route_prefix(&prefix_stack), path);
3514 add_moonbit_route_node(
3515 file_path,
3516 &safe,
3517 byte_offset + cap.get(0).unwrap().start(),
3518 "PAGE",
3519 &route_path,
3520 handler.as_deref(),
3521 now,
3522 nodes,
3523 edges,
3524 refs,
3525 );
3526 }
3527
3528 for cap in constructor_re.captures_iter(line) {
3529 let variant = cap.get(1).unwrap().as_str();
3530 let path = cap.get(2).unwrap().as_str();
3531 let handler = cap.get(3).map(|m| clean_moonbit_handler(m.as_str()));
3532 let route_path = join_route_paths(current_route_prefix(&prefix_stack), path);
3533 add_moonbit_route_node(
3534 file_path,
3535 &safe,
3536 byte_offset + cap.get(0).unwrap().start(),
3537 constructor_route_method(variant),
3538 &route_path,
3539 handler.as_deref(),
3540 now,
3541 nodes,
3542 edges,
3543 refs,
3544 );
3545 }
3546
3547 byte_offset += line.len() + 1;
3548 }
3549}
3550
3551fn add_moonbit_route_node(
3552 file_path: &str,
3553 source: &str,
3554 byte_offset: usize,
3555 method: &str,
3556 route_path: &str,
3557 handler: Option<&str>,
3558 now: i64,
3559 nodes: &mut Vec<Node>,
3560 edges: &mut Vec<Edge>,
3561 refs: &mut Vec<UnresolvedReference>,
3562) {
3563 let line = line_for(source, byte_offset);
3564 let name = format!("{method} {route_path}");
3565 let node = Node {
3566 id: format!("route:{file_path}:{line}:{method}:{route_path}"),
3567 kind: NodeKind::Route,
3568 name,
3569 qualified_name: format!("{file_path}::route:{method}:{route_path}"),
3570 file_path: file_path.to_string(),
3571 language: Language::MoonBit,
3572 start_line: line,
3573 end_line: line,
3574 start_column: 0,
3575 end_column: 0,
3576 docstring: None,
3577 signature: handler.map(|h| format!("{method} {route_path} -> {h}")),
3578 visibility: None,
3579 is_exported: false,
3580 is_async: false,
3581 is_static: false,
3582 is_abstract: false,
3583 updated_at: now,
3584 };
3585 add_contains(nodes, edges, &node);
3586 if let Some(handler) = handler {
3587 refs.push(unresolved(
3588 &node.id,
3589 handler,
3590 EdgeKind::References,
3591 file_path,
3592 Language::MoonBit,
3593 line,
3594 ));
3595 }
3596 nodes.push(node);
3597}
3598
3599fn add_framework_route_node(
3600 file_path: &str,
3601 language: Language,
3602 now: i64,
3603 nodes: &mut Vec<Node>,
3604 edges: &mut Vec<Edge>,
3605 refs: &mut Vec<UnresolvedReference>,
3606 method: &str,
3607 route_path: &str,
3608 handler: Option<&str>,
3609 line: i64,
3610 signature: Option<String>,
3611 provenance: &str,
3612) {
3613 let route_path = normalize_route_path(route_path);
3614 let method = method.to_ascii_uppercase();
3615 let name = format!("{method} {route_path}");
3616 let mut node = make_node(
3617 file_path,
3618 language,
3619 NodeKind::Route,
3620 &name,
3621 line,
3622 0,
3623 now,
3624 signature.or_else(|| handler.map(|h| format!("{method} {route_path} -> {h}"))),
3625 );
3626 node.id = format!("route:{file_path}:{line}:{method}:{route_path}");
3627 node.qualified_name = format!("{file_path}::route:{method}:{route_path}");
3628 add_contains(nodes, edges, &node);
3629 if let Some(edge) = edges.last_mut() {
3630 edge.provenance = Some(provenance.to_string());
3631 }
3632 if let Some(handler) = handler {
3633 refs_push(
3634 refs,
3635 &node.id,
3636 handler.trim(),
3637 EdgeKind::References,
3638 file_path,
3639 language,
3640 line,
3641 0,
3642 );
3643 }
3644 nodes.push(node);
3645}
3646
3647fn extract_web_file_routes(
3648 file_path: &str,
3649 language: Language,
3650 now: i64,
3651 nodes: &mut Vec<Node>,
3652 edges: &mut Vec<Edge>,
3653 refs: &mut Vec<UnresolvedReference>,
3654) {
3655 if let Some(route_path) = next_app_api_route_path(file_path) {
3656 for method in route_exported_methods(nodes) {
3657 add_framework_route_node(
3658 file_path,
3659 language,
3660 now,
3661 nodes,
3662 edges,
3663 refs,
3664 &method,
3665 &route_path,
3666 Some(&method),
3667 1,
3668 Some(format!("{method} {route_path}")),
3669 "file-route",
3670 );
3671 }
3672 return;
3673 }
3674
3675 if let Some(route_path) = file_based_page_route_path(file_path) {
3676 let handler = default_route_handler(nodes).map(str::to_string);
3677 add_framework_route_node(
3678 file_path,
3679 language,
3680 now,
3681 nodes,
3682 edges,
3683 refs,
3684 "PAGE",
3685 &route_path,
3686 handler.as_deref(),
3687 1,
3688 Some(format!("PAGE {route_path}")),
3689 "file-route",
3690 );
3691 }
3692}
3693
3694fn route_exported_methods(nodes: &[Node]) -> Vec<String> {
3695 let mut methods: Vec<String> = nodes
3696 .iter()
3697 .filter(|node| node.kind == NodeKind::Function && node.is_exported)
3698 .filter(|node| {
3699 matches!(
3700 node.name.as_str(),
3701 "GET" | "POST" | "PUT" | "PATCH" | "DELETE"
3702 )
3703 })
3704 .map(|node| node.name.clone())
3705 .collect();
3706 methods.sort();
3707 methods.dedup();
3708 methods
3709}
3710
3711fn default_route_handler(nodes: &[Node]) -> Option<&str> {
3712 nodes
3713 .iter()
3714 .find(|node| node.kind == NodeKind::Function && node.name == "default")
3715 .map(|node| node.name.as_str())
3716}
3717
3718fn next_app_api_route_path(file_path: &str) -> Option<String> {
3719 let path = file_path.strip_prefix("src/").unwrap_or(file_path);
3720 let route = path
3721 .strip_prefix("app/")
3722 .and_then(|p| p.strip_suffix("/route.ts"))
3723 .or_else(|| {
3724 path.strip_prefix("app/")
3725 .and_then(|p| p.strip_suffix("/route.js"))
3726 })?;
3727 Some(file_route_segments_to_path(route))
3728}
3729
3730fn file_based_page_route_path(file_path: &str) -> Option<String> {
3731 let path = file_path.strip_prefix("src/").unwrap_or(file_path);
3732 let route = path
3733 .strip_prefix("pages/")
3734 .and_then(strip_page_extension)
3735 .or_else(|| path.strip_prefix("routes/").and_then(strip_page_extension))?;
3736 Some(file_route_segments_to_path(route))
3737}
3738
3739fn strip_page_extension(path: &str) -> Option<&str> {
3740 for suffix in [".tsx", ".jsx", ".ts", ".js", ".svelte", ".vue"] {
3741 if let Some(stripped) = path.strip_suffix(suffix) {
3742 return Some(stripped);
3743 }
3744 }
3745 None
3746}
3747
3748fn file_route_segments_to_path(route: &str) -> String {
3749 let segments: Vec<String> = route
3750 .split('/')
3751 .filter(|segment| {
3752 !segment.is_empty()
3753 && *segment != "page"
3754 && *segment != "index"
3755 && !(segment.starts_with('(') && segment.ends_with(')'))
3756 })
3757 .map(|segment| {
3758 if segment.starts_with("[...") && segment.ends_with(']') {
3759 format!("*{}", &segment[4..segment.len() - 1])
3760 } else if segment.starts_with('[') && segment.ends_with(']') {
3761 format!(":{}", &segment[1..segment.len() - 1])
3762 } else {
3763 segment.to_string()
3764 }
3765 })
3766 .collect();
3767 normalize_route_path(&segments.join("/"))
3768}
3769
3770fn first_quoted_arg(args: &str) -> Option<&str> {
3771 let start_quote = args.find(['"', '\''])?;
3772 let quote = args.as_bytes()[start_quote] as char;
3773 let rest = &args[start_quote + 1..];
3774 let end = rest.find(quote)?;
3775 Some(&rest[..end])
3776}
3777
3778fn helper_route_method(helper: &str) -> &'static str {
3779 match helper {
3780 "route" | "page" => "PAGE",
3781 "api_get" => "GET",
3782 "api_post" => "POST",
3783 "api_put" => "PUT",
3784 "api_delete" => "DELETE",
3785 "api_patch" => "PATCH",
3786 "raw_get" => "RAW GET",
3787 "raw_post" => "RAW POST",
3788 "raw_put" => "RAW PUT",
3789 "raw_delete" => "RAW DELETE",
3790 "raw_patch" => "RAW PATCH",
3791 _ => "PAGE",
3792 }
3793}
3794
3795fn constructor_route_method(variant: &str) -> &'static str {
3796 match variant {
3797 "RawGet" => "RAW GET",
3798 "RawPost" => "RAW POST",
3799 "RawPut" => "RAW PUT",
3800 "RawDelete" => "RAW DELETE",
3801 "RawPatch" => "RAW PATCH",
3802 _ => "PAGE",
3803 }
3804}
3805
3806fn current_route_prefix(prefix_stack: &[(usize, String)]) -> &str {
3807 prefix_stack
3808 .last()
3809 .map(|(_, prefix)| prefix.as_str())
3810 .unwrap_or("")
3811}
3812
3813fn join_route_paths(prefix: &str, path: &str) -> String {
3814 if prefix.is_empty() || prefix == "/" {
3815 return normalize_route_path(path);
3816 }
3817 let path = normalize_route_path(path);
3818 if path == "/" {
3819 return normalize_route_path(prefix);
3820 }
3821 format!(
3822 "{}/{}",
3823 prefix.trim_end_matches('/'),
3824 path.trim_start_matches('/')
3825 )
3826}
3827
3828fn normalize_route_path(path: &str) -> String {
3829 if path.is_empty() {
3830 return "/".into();
3831 }
3832 let path = path.replace('\\', "/");
3833 if path.starts_with('/') {
3834 path
3835 } else {
3836 format!("/{path}")
3837 }
3838}
3839
3840fn clean_moonbit_handler(handler: &str) -> String {
3841 handler
3842 .trim()
3843 .trim_start_matches('@')
3844 .rsplit(['.', ':'])
3845 .next()
3846 .unwrap_or(handler)
3847 .trim_matches(')')
3848 .to_string()
3849}
3850
3851fn extract_moonbit_metadata(
3852 file_path: &str,
3853 source: &str,
3854 now: i64,
3855 nodes: &mut Vec<Node>,
3856 edges: &mut Vec<Edge>,
3857 refs: &mut Vec<UnresolvedReference>,
3858) {
3859 let Ok(json) = serde_json::from_str::<serde_json::Value>(source) else {
3860 return;
3861 };
3862 if file_path.ends_with("moon.mod.json") {
3863 if let Some(name) = json.get("name").and_then(|v| v.as_str()) {
3864 let node = make_node(
3865 file_path,
3866 Language::MoonBit,
3867 NodeKind::Module,
3868 name,
3869 1,
3870 0,
3871 now,
3872 Some("moon.mod.json".into()),
3873 );
3874 add_contains(nodes, edges, &node);
3875 nodes.push(node);
3876 }
3877 return;
3878 }
3879
3880 let package_name = json
3881 .get("name")
3882 .and_then(|v| v.as_str())
3883 .or_else(|| file_path.rsplit('/').nth(1))
3884 .unwrap_or("moonbit-package");
3885 let node = make_node(
3886 file_path,
3887 Language::MoonBit,
3888 NodeKind::Module,
3889 package_name,
3890 1,
3891 0,
3892 now,
3893 Some(file_path.rsplit('/').next().unwrap_or("moon.pkg").into()),
3894 );
3895 add_contains(nodes, edges, &node);
3896 let package_node_id = node.id.clone();
3897 nodes.push(node);
3898
3899 if let Some(imports) = json.get("import").or_else(|| json.get("imports")) {
3900 if let Some(obj) = imports.as_object() {
3901 for (alias, value) in obj {
3902 let target = value.as_str().unwrap_or(alias);
3903 let import_node = make_node(
3904 file_path,
3905 Language::MoonBit,
3906 NodeKind::Import,
3907 alias,
3908 1,
3909 0,
3910 now,
3911 Some(target.to_string()),
3912 );
3913 add_contains(nodes, edges, &import_node);
3914 refs.push(unresolved(
3915 &package_node_id,
3916 alias,
3917 EdgeKind::Imports,
3918 file_path,
3919 Language::MoonBit,
3920 1,
3921 ));
3922 nodes.push(import_node);
3923 }
3924 }
3925 }
3926}
3927
3928fn try_extract_rust_tree_sitter(
3929 file_path: &str,
3930 source: &str,
3931 now: i64,
3932 nodes: &mut Vec<Node>,
3933 edges: &mut Vec<Edge>,
3934 refs: &mut Vec<UnresolvedReference>,
3935) -> bool {
3936 let mut parser = Parser::new();
3937 if parser
3938 .set_language(&tree_sitter_rust::LANGUAGE.into())
3939 .is_err()
3940 {
3941 return false;
3942 }
3943 let Some(tree) = parser.parse(source, None) else {
3944 return false;
3945 };
3946 if tree.root_node().has_error() {
3947 return false;
3948 }
3949
3950 let root = tree.root_node();
3951 let mut stack = Vec::new();
3952 collect_rust_nodes(file_path, source, root, now, nodes, edges, refs, &mut stack);
3953 collect_rust_refs(file_path, source, root, nodes, refs);
3954 true
3955}
3956
3957fn collect_rust_nodes(
3958 file_path: &str,
3959 source: &str,
3960 node: SyntaxNode,
3961 now: i64,
3962 nodes: &mut Vec<Node>,
3963 edges: &mut Vec<Edge>,
3964 refs: &mut Vec<UnresolvedReference>,
3965 stack: &mut Vec<String>,
3966) {
3967 let kind = match node.kind() {
3968 "function_item" => {
3969 if rust_receiver_type(node, source).is_some() {
3970 Some(NodeKind::Method)
3971 } else {
3972 Some(NodeKind::Function)
3973 }
3974 }
3975 "struct_item" => Some(NodeKind::Struct),
3976 "trait_item" => Some(NodeKind::Trait),
3977 "enum_item" => Some(NodeKind::Enum),
3978 "enum_variant" => Some(NodeKind::EnumMember),
3979 "type_item" => Some(NodeKind::TypeAlias),
3980 "const_item" => Some(NodeKind::Constant),
3981 "static_item" => Some(NodeKind::Variable),
3982 "let_declaration" => Some(NodeKind::Variable),
3983 "field_declaration" => Some(NodeKind::Field),
3984 "function_signature_item" => Some(NodeKind::Method),
3985 "use_declaration" => Some(NodeKind::Import),
3986 "mod_item" => Some(NodeKind::Module),
3987 _ => None,
3988 };
3989
3990 let mut pushed = false;
3991 if let Some(kind) = kind {
3992 if let Some(name) = rust_node_name(node, source, kind) {
3993 let signature = Some(
3994 node_text(node, source)
3995 .lines()
3996 .next()
3997 .unwrap_or("")
3998 .trim()
3999 .to_string(),
4000 );
4001 let mut out =
4002 make_node_span(file_path, Language::Rust, kind, &name, node, now, signature);
4003 out.is_exported = rust_is_public(node, source);
4004 out.visibility = if out.is_exported {
4005 Some("public".into())
4006 } else if matches!(
4007 kind,
4008 NodeKind::Function
4009 | NodeKind::Method
4010 | NodeKind::Struct
4011 | NodeKind::Trait
4012 | NodeKind::Enum
4013 | NodeKind::TypeAlias
4014 ) {
4015 Some("private".into())
4016 } else {
4017 None
4018 };
4019 out.is_async = node_text(node, source).trim_start().starts_with("async ")
4020 || node_text(node, source).contains(" async fn ");
4021 if kind == NodeKind::Method {
4022 if let Some(owner) = rust_receiver_type(node, source) {
4023 out.qualified_name = format!("{owner}::{name}");
4024 }
4025 }
4026 add_contains_from_stack(nodes, edges, stack, &out, "tree-sitter");
4027 let id = out.id.clone();
4028 nodes.push(out);
4029 if matches!(
4030 kind,
4031 NodeKind::Struct
4032 | NodeKind::Trait
4033 | NodeKind::Enum
4034 | NodeKind::Module
4035 | NodeKind::Function
4036 | NodeKind::Method
4037 ) {
4038 stack.push(id);
4039 pushed = true;
4040 }
4041 }
4042 }
4043
4044 if node.kind() == "impl_item" {
4045 if let Some((trait_name, type_name)) = rust_impl_trait_for_type(node, source) {
4046 if let Some(type_node) = nodes.iter().find(|n| {
4047 n.name == type_name
4048 && matches!(n.kind, NodeKind::Struct | NodeKind::Enum | NodeKind::Trait)
4049 }) {
4050 refs_push(
4051 refs,
4052 &type_node.id,
4053 &trait_name,
4054 EdgeKind::Implements,
4055 file_path,
4056 Language::Rust,
4057 node.start_position().row as i64 + 1,
4058 node.start_position().column as i64,
4059 );
4060 }
4061 }
4062 }
4063
4064 for child in named_children(node) {
4065 collect_rust_nodes(file_path, source, child, now, nodes, edges, refs, stack);
4066 }
4067
4068 if pushed {
4069 stack.pop();
4070 }
4071}
4072
4073fn collect_rust_refs(
4074 file_path: &str,
4075 source: &str,
4076 node: SyntaxNode,
4077 nodes: &[Node],
4078 refs: &mut Vec<UnresolvedReference>,
4079) {
4080 match node.kind() {
4081 "use_declaration" => {
4082 if let Some(name) = rust_import_root(node, source) {
4083 refs_push(
4084 refs,
4085 &format!("file:{file_path}"),
4086 &name,
4087 EdgeKind::Imports,
4088 file_path,
4089 Language::Rust,
4090 node.start_position().row as i64 + 1,
4091 node.start_position().column as i64,
4092 );
4093 }
4094 }
4095 "call_expression" => {
4096 if let Some(function) = node.child_by_field_name("function") {
4097 if let Some(name) = callable_name(function, source) {
4098 if let Some(caller) =
4099 enclosing_callable(nodes, node.start_position().row as i64 + 1)
4100 {
4101 refs_push(
4102 refs,
4103 &caller.id,
4104 &name,
4105 EdgeKind::Calls,
4106 file_path,
4107 Language::Rust,
4108 node.start_position().row as i64 + 1,
4109 node.start_position().column as i64,
4110 );
4111 }
4112 }
4113 }
4114 }
4115 _ => {}
4116 }
4117
4118 for child in named_children(node) {
4119 collect_rust_refs(file_path, source, child, nodes, refs);
4120 }
4121}
4122
4123fn try_extract_moonbit_tree_sitter(
4124 file_path: &str,
4125 source: &str,
4126 now: i64,
4127 nodes: &mut Vec<Node>,
4128 edges: &mut Vec<Edge>,
4129 refs: &mut Vec<UnresolvedReference>,
4130) -> bool {
4131 let mut parser = Parser::new();
4132 if parser
4133 .set_language(&tree_sitter_moonbit::LANGUAGE.into())
4134 .is_err()
4135 {
4136 return false;
4137 }
4138 let Some(tree) = parser.parse(source, None) else {
4139 return false;
4140 };
4141 if tree.root_node().has_error() {
4142 return false;
4143 }
4144
4145 let root = tree.root_node();
4146 let mut stack = Vec::new();
4147 collect_moonbit_nodes(file_path, source, root, now, nodes, edges, &mut stack);
4148 collect_moonbit_refs(file_path, source, root, nodes, refs);
4149 true
4150}
4151
4152fn collect_moonbit_nodes(
4153 file_path: &str,
4154 source: &str,
4155 node: SyntaxNode,
4156 now: i64,
4157 nodes: &mut Vec<Node>,
4158 edges: &mut Vec<Edge>,
4159 stack: &mut Vec<String>,
4160) {
4161 let kind = match node.kind() {
4162 "function_definition" => Some(NodeKind::Function),
4163 "impl_definition" => Some(NodeKind::Method),
4164 "struct_definition" | "tuple_struct_definition" => Some(NodeKind::Struct),
4165 "trait_definition" => Some(NodeKind::Trait),
4166 "trait_method_declaration" => Some(NodeKind::Method),
4167 "enum_definition" => Some(NodeKind::Enum),
4168 "enum_constructor" => Some(NodeKind::EnumMember),
4169 "type_alias_definition" | "type_definition" => Some(NodeKind::TypeAlias),
4170 "const_definition" => Some(NodeKind::Constant),
4171 "import_declaration" => Some(NodeKind::Import),
4172 "package_declaration" => Some(NodeKind::Module),
4173 _ => None,
4174 };
4175
4176 let mut pushed = false;
4177 if let Some(kind) = kind {
4178 if let Some(name) = moonbit_node_name(node, source, kind) {
4179 let signature = Some(
4180 node_text(node, source)
4181 .lines()
4182 .next()
4183 .unwrap_or("")
4184 .trim()
4185 .to_string(),
4186 );
4187 let mut out = make_node_span(
4188 file_path,
4189 Language::MoonBit,
4190 kind,
4191 &name,
4192 node,
4193 now,
4194 signature,
4195 );
4196 out.is_exported = moonbit_is_public(node, source);
4197 out.visibility = if out.is_exported {
4198 Some("public".into())
4199 } else {
4200 None
4201 };
4202 if kind == NodeKind::Method {
4203 if let Some(owner) = moonbit_impl_owner(node, source) {
4204 out.qualified_name = format!("{owner}::{name}");
4205 }
4206 }
4207 add_contains_from_stack(nodes, edges, stack, &out, "tree-sitter");
4208 let id = out.id.clone();
4209 nodes.push(out);
4210 if matches!(
4211 kind,
4212 NodeKind::Struct
4213 | NodeKind::Trait
4214 | NodeKind::Enum
4215 | NodeKind::Module
4216 | NodeKind::Function
4217 | NodeKind::Method
4218 ) {
4219 stack.push(id);
4220 pushed = true;
4221 }
4222 }
4223 }
4224
4225 for child in named_children(node) {
4226 collect_moonbit_nodes(file_path, source, child, now, nodes, edges, stack);
4227 }
4228
4229 if pushed {
4230 stack.pop();
4231 }
4232}
4233
4234fn collect_moonbit_refs(
4235 file_path: &str,
4236 source: &str,
4237 node: SyntaxNode,
4238 nodes: &[Node],
4239 refs: &mut Vec<UnresolvedReference>,
4240) {
4241 match node.kind() {
4242 "import_declaration" => {
4243 for child in named_children(node) {
4244 if child.kind() == "import_item" {
4245 if let Some(name) = moonbit_import_name(child, source) {
4246 refs_push(
4247 refs,
4248 &format!("file:{file_path}"),
4249 &name,
4250 EdgeKind::Imports,
4251 file_path,
4252 Language::MoonBit,
4253 child.start_position().row as i64 + 1,
4254 child.start_position().column as i64,
4255 );
4256 }
4257 }
4258 }
4259 }
4260 "apply_expression" | "dot_apply_expression" | "dot_dot_apply_expression" => {
4261 if let Some(name) = moonbit_call_name(node, source) {
4262 if let Some(caller) =
4263 enclosing_callable(nodes, node.start_position().row as i64 + 1)
4264 {
4265 refs_push(
4266 refs,
4267 &caller.id,
4268 &name,
4269 EdgeKind::Calls,
4270 file_path,
4271 Language::MoonBit,
4272 node.start_position().row as i64 + 1,
4273 node.start_position().column as i64,
4274 );
4275 }
4276 }
4277 }
4278 _ => {}
4279 }
4280
4281 for child in named_children(node) {
4282 collect_moonbit_refs(file_path, source, child, nodes, refs);
4283 }
4284}
4285
4286fn extract_liquid_vue_svelte(
4287 file_path: &str,
4288 source: &str,
4289 language: Language,
4290 now: i64,
4291 nodes: &mut Vec<Node>,
4292 edges: &mut Vec<Edge>,
4293 refs: &mut Vec<UnresolvedReference>,
4294) {
4295 match language {
4296 Language::Liquid => extract_liquid(file_path, source, now, nodes, edges, refs),
4297 Language::Vue | Language::Svelte => {
4298 extract_component_file(file_path, source, language, now, nodes, edges);
4299 extract_component_script_symbols(file_path, source, language, now, nodes, edges, refs);
4300 extract_web_file_routes(file_path, language, now, nodes, edges, refs);
4301 match language {
4302 Language::Vue => extract_vue_template_components(file_path, source, language, refs),
4303 Language::Svelte => extract_svelte_template_refs(file_path, source, language, refs),
4304 _ => {}
4305 }
4306 }
4307 _ => {}
4308 }
4309}
4310
4311fn extract_liquid(
4312 file_path: &str,
4313 source: &str,
4314 now: i64,
4315 nodes: &mut Vec<Node>,
4316 edges: &mut Vec<Edge>,
4317 refs: &mut Vec<UnresolvedReference>,
4318) {
4319 let snippet_re = Regex::new(r#"\{%-?\s*(render|include)\s+['"]([^'"]+)['"]"#).unwrap();
4320 for cap in snippet_re.captures_iter(source) {
4321 let full = cap.get(0).unwrap();
4322 let tag = cap.get(1).unwrap().as_str();
4323 let name = cap.get(2).unwrap();
4324 let line = line_for(source, full.start());
4325 add_liquid_reference_node(
4326 file_path,
4327 now,
4328 nodes,
4329 edges,
4330 refs,
4331 name.as_str(),
4332 &format!("{}:{}", tag, name.as_str()),
4333 &format!("snippets/{}.liquid", name.as_str()),
4334 line,
4335 full.as_str(),
4336 );
4337 }
4338
4339 let section_re = Regex::new(r#"\{%-?\s*section\s+['"]([^'"]+)['"]"#).unwrap();
4340 for cap in section_re.captures_iter(source) {
4341 let full = cap.get(0).unwrap();
4342 let name = cap.get(1).unwrap();
4343 let line = line_for(source, full.start());
4344 add_liquid_reference_node(
4345 file_path,
4346 now,
4347 nodes,
4348 edges,
4349 refs,
4350 name.as_str(),
4351 &format!("section:{}", name.as_str()),
4352 &format!("sections/{}.liquid", name.as_str()),
4353 line,
4354 full.as_str(),
4355 );
4356 }
4357
4358 let schema_re =
4359 Regex::new(r"(?s)\{%-?\s*schema\s*-?%\}(.*?)\{%-?\s*endschema\s*-?%\}").unwrap();
4360 for cap in schema_re.captures_iter(source) {
4361 let full = cap.get(0).unwrap();
4362 let body = cap.get(1).map(|m| m.as_str()).unwrap_or_default();
4363 let line = line_for(source, full.start());
4364 let mut node = make_node(
4365 file_path,
4366 Language::Liquid,
4367 NodeKind::Constant,
4368 "schema",
4369 line,
4370 0,
4371 now,
4372 Some(
4373 full.as_str()
4374 .lines()
4375 .next()
4376 .unwrap_or("{% schema %}")
4377 .trim()
4378 .to_string(),
4379 ),
4380 );
4381 node.qualified_name = format!("{}::schema", file_path);
4382 node.docstring = Some(body.trim().chars().take(200).collect());
4383 add_contains(nodes, edges, &node);
4384 nodes.push(node);
4385 }
4386
4387 let assign_re = Regex::new(r"\{%-?\s*assign\s+([A-Za-z_][A-Za-z0-9_]*)\s*=").unwrap();
4388 for cap in assign_re.captures_iter(source) {
4389 let full = cap.get(0).unwrap();
4390 let name = cap.get(1).unwrap();
4391 let mut node = make_node(
4392 file_path,
4393 Language::Liquid,
4394 NodeKind::Variable,
4395 name.as_str(),
4396 line_for(source, name.start()),
4397 0,
4398 now,
4399 Some(full.as_str().trim().to_string()),
4400 );
4401 node.qualified_name = format!("{}::{}", file_path, name.as_str());
4402 add_contains(nodes, edges, &node);
4403 nodes.push(node);
4404 }
4405}
4406
4407fn add_liquid_reference_node(
4408 file_path: &str,
4409 now: i64,
4410 nodes: &mut Vec<Node>,
4411 edges: &mut Vec<Edge>,
4412 refs: &mut Vec<UnresolvedReference>,
4413 name: &str,
4414 qualified_suffix: &str,
4415 reference_name: &str,
4416 line: i64,
4417 signature: &str,
4418) {
4419 let import_node = make_node(
4420 file_path,
4421 Language::Liquid,
4422 NodeKind::Import,
4423 name,
4424 line,
4425 0,
4426 now,
4427 Some(signature.trim().to_string()),
4428 );
4429 add_contains(nodes, edges, &import_node);
4430 nodes.push(import_node);
4431
4432 let mut component_node = make_node(
4433 file_path,
4434 Language::Liquid,
4435 NodeKind::Component,
4436 name,
4437 line,
4438 0,
4439 now,
4440 Some(signature.trim().to_string()),
4441 );
4442 component_node.qualified_name = format!("{}::{}", file_path, qualified_suffix);
4443 add_contains(nodes, edges, &component_node);
4444 nodes.push(component_node);
4445
4446 refs.push(unresolved(
4447 &nodes[0].id,
4448 reference_name,
4449 EdgeKind::References,
4450 file_path,
4451 Language::Liquid,
4452 line,
4453 ));
4454}
4455
4456fn extract_component_file(
4457 file_path: &str,
4458 source: &str,
4459 language: Language,
4460 now: i64,
4461 nodes: &mut Vec<Node>,
4462 edges: &mut Vec<Edge>,
4463) {
4464 let name = component_name_from_path(file_path, language);
4465 let mut node = make_node(
4466 file_path,
4467 language,
4468 NodeKind::Component,
4469 &name,
4470 1,
4471 0,
4472 now,
4473 None,
4474 );
4475 node.qualified_name = format!("{}::{}", file_path, name);
4476 node.end_line = source.lines().count().max(1) as i64;
4477 node.is_exported = true;
4478 node.visibility = Some("public".to_string());
4479 add_contains(nodes, edges, &node);
4480 nodes.push(node);
4481}
4482
4483fn component_name_from_path(file_path: &str, language: Language) -> String {
4484 let file_name = file_path.rsplit('/').next().unwrap_or(file_path);
4485 match language {
4486 Language::Vue => file_name.strip_suffix(".vue").unwrap_or(file_name),
4487 Language::Svelte => file_name.strip_suffix(".svelte").unwrap_or(file_name),
4488 _ => file_name,
4489 }
4490 .to_string()
4491}
4492
4493fn extract_component_script_symbols(
4494 file_path: &str,
4495 source: &str,
4496 language: Language,
4497 now: i64,
4498 nodes: &mut Vec<Node>,
4499 edges: &mut Vec<Edge>,
4500 refs: &mut Vec<UnresolvedReference>,
4501) {
4502 for block in script_blocks(source) {
4503 let before_nodes = nodes.len();
4504 let before_refs = refs.len();
4505 extract_typescript_javascript(file_path, &block.content, language, now, nodes, edges, refs);
4506 for node in nodes.iter_mut().skip(before_nodes) {
4507 node.start_line += block.start_line - 1;
4508 node.end_line += block.start_line - 1;
4509 }
4510 for reference in refs.iter_mut().skip(before_refs) {
4511 reference.line += block.start_line - 1;
4512 }
4513 }
4514}
4515
4516struct ScriptBlock {
4517 content: String,
4518 start_line: i64,
4519}
4520
4521fn script_blocks(source: &str) -> Vec<ScriptBlock> {
4522 let re = Regex::new(r"(?is)<script(?:\s[^>]*)?>(.*?)</script>").unwrap();
4523 re.captures_iter(source)
4524 .filter_map(|cap| {
4525 let content = cap.get(1)?;
4526 Some(ScriptBlock {
4527 content: content.as_str().to_string(),
4528 start_line: line_for(source, content.start()),
4529 })
4530 })
4531 .collect()
4532}
4533
4534fn extract_vue_template_components(
4535 file_path: &str,
4536 source: &str,
4537 language: Language,
4538 refs: &mut Vec<UnresolvedReference>,
4539) {
4540 let tag_re = Regex::new(r"<([A-Z][A-Za-z0-9_$]*)\b").unwrap();
4541 for cap in tag_re.captures_iter(source) {
4542 let tag = cap.get(1).unwrap();
4543 refs.push(unresolved(
4544 &format!("file:{}", file_path),
4545 tag.as_str(),
4546 EdgeKind::References,
4547 file_path,
4548 language,
4549 line_for(source, tag.start()),
4550 ));
4551 }
4552}
4553
4554fn extract_svelte_template_refs(
4555 file_path: &str,
4556 source: &str,
4557 language: Language,
4558 refs: &mut Vec<UnresolvedReference>,
4559) {
4560 extract_vue_template_components(file_path, source, language, refs);
4561 let expr_re = Regex::new(r"\{([^}#/:@][^}]*)\}").unwrap();
4562 let call_re = Regex::new(r"\b([A-Za-z_$][A-Za-z0-9_$.]*)\s*\(").unwrap();
4563 let runes = [
4564 "$props",
4565 "$state",
4566 "$derived",
4567 "$effect",
4568 "$bindable",
4569 "$inspect",
4570 "$host",
4571 "$snippet",
4572 ];
4573 for expr in expr_re.captures_iter(source) {
4574 let Some(body) = expr.get(1) else {
4575 continue;
4576 };
4577 for call in call_re.captures_iter(body.as_str()) {
4578 let name = call.get(1).unwrap().as_str();
4579 if runes.contains(&name) || matches!(name, "if" | "else" | "each" | "await") {
4580 continue;
4581 }
4582 refs.push(unresolved(
4583 &format!("file:{}", file_path),
4584 name,
4585 EdgeKind::Calls,
4586 file_path,
4587 language,
4588 line_for(source, body.start() + call.get(1).unwrap().start()),
4589 ));
4590 }
4591 }
4592}
4593
4594fn extract_generic(
4595 file_path: &str,
4596 source: &str,
4597 language: Language,
4598 now: i64,
4599 nodes: &mut Vec<Node>,
4600 edges: &mut Vec<Edge>,
4601 refs: &mut Vec<UnresolvedReference>,
4602) {
4603 add_regex_nodes(
4604 file_path,
4605 source,
4606 language,
4607 now,
4608 nodes,
4609 edges,
4610 r"(?m)^\s*(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)",
4611 NodeKind::Function,
4612 );
4613 add_regex_nodes(
4614 file_path,
4615 source,
4616 language,
4617 now,
4618 nodes,
4619 edges,
4620 r"(?m)^\s*(?:export\s+)?class\s+([A-Za-z_$][A-Za-z0-9_$]*)",
4621 NodeKind::Class,
4622 );
4623 add_call_refs(
4624 file_path,
4625 source,
4626 language,
4627 nodes,
4628 refs,
4629 r"([A-Za-z_$][A-Za-z0-9_$.]*)\s*\(",
4630 );
4631}
4632
4633fn add_regex_nodes(
4634 file_path: &str,
4635 source: &str,
4636 language: Language,
4637 now: i64,
4638 nodes: &mut Vec<Node>,
4639 edges: &mut Vec<Edge>,
4640 pattern: &str,
4641 kind: NodeKind,
4642) {
4643 let re = Regex::new(pattern).unwrap();
4644 for cap in re.captures_iter(source) {
4645 let Some(name_match) = cap.get(2).or_else(|| cap.get(1)) else {
4646 continue;
4647 };
4648 let mut name = name_match.as_str().to_string();
4649 if kind == NodeKind::Method && name.contains("::") {
4650 name = name.rsplit("::").next().unwrap_or(&name).to_string();
4651 }
4652 let signature = cap.get(0).map(|m| m.as_str().trim().to_string());
4653 let line = line_for(source, name_match.start());
4654 let mut node = make_node(file_path, language, kind, &name, line, 0, now, signature);
4655 node.is_exported = cap
4656 .get(1)
4657 .map(|m| m.as_str().contains("pub") || m.as_str().contains("export"))
4658 .unwrap_or(false);
4659 node.visibility = if node.is_exported {
4660 Some("public".into())
4661 } else {
4662 None
4663 };
4664 add_contains(nodes, edges, &node);
4665 nodes.push(node);
4666 }
4667}
4668
4669fn add_call_refs(
4670 file_path: &str,
4671 source: &str,
4672 language: Language,
4673 nodes: &[Node],
4674 refs: &mut Vec<UnresolvedReference>,
4675 pattern: &str,
4676) {
4677 let re = Regex::new(pattern).unwrap();
4678 let keywords = [
4679 "if", "for", "while", "match", "return", "fn", "test", "inspect", "Some", "Ok", "Err",
4680 ];
4681 for cap in re.captures_iter(source) {
4682 let name = cap.get(1).unwrap().as_str().rsplit("::").next().unwrap();
4683 if keywords.contains(&name) {
4684 continue;
4685 }
4686 let line = line_for(source, cap.get(1).unwrap().start());
4687 if let Some(caller) = nodes
4688 .iter()
4689 .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
4690 .rev()
4691 .find(|n| n.start_line <= line)
4692 {
4693 refs.push(unresolved(
4694 &caller.id,
4695 name,
4696 EdgeKind::Calls,
4697 file_path,
4698 language,
4699 line,
4700 ));
4701 }
4702 }
4703}
4704
4705fn make_node(
4706 file_path: &str,
4707 language: Language,
4708 kind: NodeKind,
4709 name: &str,
4710 line: i64,
4711 col: i64,
4712 now: i64,
4713 signature: Option<String>,
4714) -> Node {
4715 Node {
4716 id: format!("{}:{}:{}:{}", kind.as_str(), file_path, name, line),
4717 kind,
4718 name: name.to_string(),
4719 qualified_name: name.to_string(),
4720 file_path: file_path.to_string(),
4721 language,
4722 start_line: line,
4723 end_line: line,
4724 start_column: col,
4725 end_column: col,
4726 docstring: None,
4727 signature,
4728 visibility: None,
4729 is_exported: false,
4730 is_async: false,
4731 is_static: false,
4732 is_abstract: false,
4733 updated_at: now,
4734 }
4735}
4736
4737fn make_node_span(
4738 file_path: &str,
4739 language: Language,
4740 kind: NodeKind,
4741 name: &str,
4742 node: SyntaxNode,
4743 now: i64,
4744 signature: Option<String>,
4745) -> Node {
4746 let start = node.start_position();
4747 let end = node.end_position();
4748 Node {
4749 id: format!("{}:{}:{}:{}", kind.as_str(), file_path, name, start.row + 1),
4750 kind,
4751 name: name.to_string(),
4752 qualified_name: name.to_string(),
4753 file_path: file_path.to_string(),
4754 language,
4755 start_line: start.row as i64 + 1,
4756 end_line: end.row as i64 + 1,
4757 start_column: start.column as i64,
4758 end_column: end.column as i64,
4759 docstring: None,
4760 signature,
4761 visibility: None,
4762 is_exported: false,
4763 is_async: false,
4764 is_static: false,
4765 is_abstract: false,
4766 updated_at: now,
4767 }
4768}
4769
4770fn add_contains(nodes: &[Node], edges: &mut Vec<Edge>, node: &Node) {
4771 if let Some(file) = nodes.first() {
4772 edges.push(Edge {
4773 id: None,
4774 source: file.id.clone(),
4775 target: node.id.clone(),
4776 kind: EdgeKind::Contains,
4777 line: None,
4778 col: None,
4779 provenance: Some("regex".into()),
4780 });
4781 }
4782}
4783
4784fn add_contains_from_stack(
4785 nodes: &[Node],
4786 edges: &mut Vec<Edge>,
4787 stack: &[String],
4788 node: &Node,
4789 provenance: &str,
4790) {
4791 let source = stack
4792 .last()
4793 .cloned()
4794 .or_else(|| nodes.first().map(|n| n.id.clone()));
4795 if let Some(source) = source {
4796 edges.push(Edge {
4797 id: None,
4798 source,
4799 target: node.id.clone(),
4800 kind: EdgeKind::Contains,
4801 line: None,
4802 col: None,
4803 provenance: Some(provenance.into()),
4804 });
4805 }
4806}
4807
4808fn unresolved(
4809 from: &str,
4810 name: &str,
4811 kind: EdgeKind,
4812 file_path: &str,
4813 language: Language,
4814 line: i64,
4815) -> UnresolvedReference {
4816 UnresolvedReference {
4817 from_node_id: from.to_string(),
4818 reference_name: name.to_string(),
4819 reference_kind: kind,
4820 line,
4821 column: 0,
4822 file_path: file_path.to_string(),
4823 language,
4824 }
4825}
4826
4827fn refs_push(
4828 refs: &mut Vec<UnresolvedReference>,
4829 from: &str,
4830 name: &str,
4831 kind: EdgeKind,
4832 file_path: &str,
4833 language: Language,
4834 line: i64,
4835 column: i64,
4836) {
4837 if !name.is_empty() {
4838 refs.push(UnresolvedReference {
4839 from_node_id: from.to_string(),
4840 reference_name: name.to_string(),
4841 reference_kind: kind,
4842 line,
4843 column,
4844 file_path: file_path.to_string(),
4845 language,
4846 });
4847 }
4848}
4849
4850fn named_children(node: SyntaxNode) -> Vec<SyntaxNode> {
4851 (0..node.named_child_count())
4852 .filter_map(|i| node.named_child(i as u32))
4853 .collect()
4854}
4855
4856fn node_text<'a>(node: SyntaxNode, source: &'a str) -> &'a str {
4857 source.get(node.byte_range()).unwrap_or_default()
4858}
4859
4860fn child_text_by_kind<'a>(node: SyntaxNode, source: &'a str, kinds: &[&str]) -> Option<&'a str> {
4861 named_children(node)
4862 .into_iter()
4863 .find(|child| kinds.contains(&child.kind()))
4864 .map(|child| node_text(child, source))
4865}
4866
4867fn descendant_text_by_kind<'a>(
4868 node: SyntaxNode,
4869 source: &'a str,
4870 kinds: &[&str],
4871) -> Option<&'a str> {
4872 if kinds.contains(&node.kind()) {
4873 return Some(node_text(node, source));
4874 }
4875 for child in named_children(node) {
4876 if let Some(text) = descendant_text_by_kind(child, source, kinds) {
4877 return Some(text);
4878 }
4879 }
4880 None
4881}
4882
4883fn rust_node_name(node: SyntaxNode, source: &str, kind: NodeKind) -> Option<String> {
4884 if kind == NodeKind::Import {
4885 return rust_import_root(node, source);
4886 }
4887 if kind == NodeKind::Variable && node.kind() == "let_declaration" {
4888 return descendant_text_by_kind(node, source, &["identifier"]).map(clean_symbol_name);
4889 }
4890 if kind == NodeKind::Field {
4891 return child_text_by_kind(node, source, &["field_identifier", "identifier"])
4892 .map(clean_symbol_name);
4893 }
4894 node.child_by_field_name("name")
4895 .map(|n| clean_symbol_name(node_text(n, source)))
4896 .or_else(|| {
4897 child_text_by_kind(
4898 node,
4899 source,
4900 &["identifier", "type_identifier", "field_identifier"],
4901 )
4902 .map(clean_symbol_name)
4903 })
4904}
4905
4906fn rust_is_public(node: SyntaxNode, source: &str) -> bool {
4907 node_text(node, source).trim_start().starts_with("pub")
4908 || named_children(node).into_iter().any(|child| {
4909 child.kind() == "visibility_modifier" && node_text(child, source).contains("pub")
4910 })
4911}
4912
4913fn rust_receiver_type(node: SyntaxNode, source: &str) -> Option<String> {
4914 let mut parent = node.parent();
4915 while let Some(p) = parent {
4916 if p.kind() == "impl_item" {
4917 let mut direct = named_children(p)
4918 .into_iter()
4919 .filter(|child| {
4920 matches!(
4921 child.kind(),
4922 "type_identifier" | "generic_type" | "scoped_type_identifier"
4923 )
4924 })
4925 .collect::<Vec<_>>();
4926 if let Some(last) = direct.pop() {
4927 return Some(clean_type_name(node_text(last, source)));
4928 }
4929 return descendant_text_by_kind(p, source, &["type_identifier"]).map(clean_type_name);
4930 }
4931 parent = p.parent();
4932 }
4933 None
4934}
4935
4936fn rust_impl_trait_for_type(node: SyntaxNode, source: &str) -> Option<(String, String)> {
4937 if node.kind() != "impl_item" || !node_text(node, source).contains(" for ") {
4938 return None;
4939 }
4940 let names: Vec<String> = named_children(node)
4941 .into_iter()
4942 .filter(|child| {
4943 matches!(
4944 child.kind(),
4945 "type_identifier" | "generic_type" | "scoped_type_identifier"
4946 )
4947 })
4948 .map(|child| clean_type_name(node_text(child, source)))
4949 .collect();
4950 if names.len() >= 2 {
4951 Some((names[0].clone(), names[names.len() - 1].clone()))
4952 } else {
4953 None
4954 }
4955}
4956
4957fn rust_import_root(node: SyntaxNode, source: &str) -> Option<String> {
4958 let text = node_text(node, source)
4959 .trim()
4960 .strip_prefix("use")
4961 .unwrap_or(node_text(node, source))
4962 .trim()
4963 .trim_end_matches(';')
4964 .trim();
4965 text.split("::")
4966 .next()
4967 .map(|s| s.trim_matches('{').trim().to_string())
4968 .filter(|s| !s.is_empty())
4969}
4970
4971fn callable_name(node: SyntaxNode, source: &str) -> Option<String> {
4972 match node.kind() {
4973 "identifier" | "field_identifier" => Some(clean_symbol_name(node_text(node, source))),
4974 "scoped_identifier" => node_text(node, source)
4975 .rsplit("::")
4976 .next()
4977 .map(clean_symbol_name),
4978 "field_expression" => node
4979 .child_by_field_name("field")
4980 .map(|field| clean_symbol_name(node_text(field, source))),
4981 "generic_function" => named_children(node)
4982 .into_iter()
4983 .find_map(|child| callable_name(child, source)),
4984 _ => None,
4985 }
4986}
4987
4988fn moonbit_node_name(node: SyntaxNode, source: &str, kind: NodeKind) -> Option<String> {
4989 match kind {
4990 NodeKind::Function | NodeKind::Method => child_text_by_kind(
4991 node,
4992 source,
4993 &["function_identifier", "lowercase_identifier", "identifier"],
4994 )
4995 .map(|s| clean_symbol_name(s.rsplit("::").next().unwrap_or(s))),
4996 NodeKind::Struct | NodeKind::Trait | NodeKind::Enum => child_text_by_kind(
4997 node,
4998 source,
4999 &[
5000 "identifier",
5001 "type_identifier",
5002 "type_name",
5003 "uppercase_identifier",
5004 ],
5005 )
5006 .map(clean_symbol_name),
5007 NodeKind::EnumMember => child_text_by_kind(
5008 node,
5009 source,
5010 &["uppercase_identifier", "identifier", "type_name"],
5011 )
5012 .map(clean_symbol_name),
5013 NodeKind::TypeAlias => descendant_text_by_kind(
5014 node,
5015 source,
5016 &[
5017 "type_identifier",
5018 "type_name",
5019 "identifier",
5020 "uppercase_identifier",
5021 ],
5022 )
5023 .map(clean_symbol_name),
5024 NodeKind::Constant => {
5025 child_text_by_kind(node, source, &["uppercase_identifier", "identifier"])
5026 .map(clean_symbol_name)
5027 }
5028 NodeKind::Import => moonbit_import_name(node, source),
5029 NodeKind::Module => node
5030 .named_child(0)
5031 .map(|child| clean_quoted(node_text(child, source))),
5032 _ => None,
5033 }
5034}
5035
5036fn moonbit_is_public(node: SyntaxNode, source: &str) -> bool {
5037 named_children(node)
5038 .into_iter()
5039 .any(|child| child.kind() == "visibility" && node_text(child, source).contains("pub"))
5040 || node_text(node, source).trim_start().starts_with("pub ")
5041}
5042
5043fn moonbit_impl_owner(node: SyntaxNode, source: &str) -> Option<String> {
5044 child_text_by_kind(
5045 node,
5046 source,
5047 &["type_name", "type_identifier", "qualified_type_identifier"],
5048 )
5049 .map(clean_type_name)
5050}
5051
5052fn moonbit_import_name(node: SyntaxNode, source: &str) -> Option<String> {
5053 if node.kind() == "import_declaration" {
5054 return named_children(node)
5055 .into_iter()
5056 .find(|child| child.kind() == "import_item")
5057 .and_then(|child| moonbit_import_name(child, source));
5058 }
5059 named_children(node)
5060 .into_iter()
5061 .find(|child| child.kind() == "string_literal")
5062 .map(|child| clean_quoted(node_text(child, source)))
5063}
5064
5065fn moonbit_call_name(node: SyntaxNode, source: &str) -> Option<String> {
5066 for child in named_children(node) {
5067 match child.kind() {
5068 "qualified_identifier" | "function_identifier" | "method_expression" => {
5069 let text = node_text(child, source);
5070 let name = text
5071 .rsplit(['.', ':'])
5072 .find(|part| !part.is_empty())
5073 .unwrap_or(text);
5074 return Some(clean_symbol_name(name));
5075 }
5076 "lowercase_identifier" | "identifier" => {
5077 return Some(clean_symbol_name(node_text(child, source)));
5078 }
5079 _ => {}
5080 }
5081 }
5082 None
5083}
5084
5085fn enclosing_callable(nodes: &[Node], line: i64) -> Option<&Node> {
5086 nodes
5087 .iter()
5088 .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
5089 .filter(|n| n.start_line <= line && line <= n.end_line.max(n.start_line))
5090 .min_by_key(|n| n.end_line - n.start_line)
5091}
5092
5093fn clean_symbol_name(s: &str) -> String {
5094 s.trim()
5095 .trim_matches('"')
5096 .trim_matches('\'')
5097 .trim_start_matches('.')
5098 .to_string()
5099}
5100
5101fn clean_quoted(s: &str) -> String {
5102 s.trim().trim_matches('"').trim_matches('\'').to_string()
5103}
5104
5105fn clean_type_name(s: &str) -> String {
5106 let s = s.trim();
5107 let before_generics = s.split('<').next().unwrap_or(s);
5108 before_generics
5109 .rsplit("::")
5110 .next()
5111 .unwrap_or(before_generics)
5112 .trim()
5113 .to_string()
5114}
5115
5116fn line_for(source: &str, idx: usize) -> i64 {
5117 source[..idx.min(source.len())]
5118 .bytes()
5119 .filter(|b| *b == b'\n')
5120 .count() as i64
5121 + 1
5122}
5123
5124fn extract_mbt_markdown_code_with_padding(source: &str) -> String {
5125 let mut out = String::new();
5126 let mut in_mbt = false;
5127 for line in source.lines() {
5128 let trimmed = line.trim_start();
5129 if trimmed.starts_with("```") {
5130 in_mbt = trimmed.contains("mbt");
5131 out.push('\n');
5132 continue;
5133 }
5134 if in_mbt {
5135 out.push_str(line);
5136 }
5137 out.push('\n');
5138 }
5139 out
5140}
5141
5142fn strip_moonbit_comments_preserve_lines(source: &str) -> String {
5143 let mut out = String::with_capacity(source.len());
5144 let mut chars = source.chars().peekable();
5145 let mut in_string = false;
5146 let mut escaped = false;
5147 while let Some(ch) = chars.next() {
5148 if in_string {
5149 out.push(ch);
5150 if escaped {
5151 escaped = false;
5152 } else if ch == '\\' {
5153 escaped = true;
5154 } else if ch == '"' {
5155 in_string = false;
5156 }
5157 continue;
5158 }
5159
5160 if ch == '"' {
5161 in_string = true;
5162 out.push(ch);
5163 continue;
5164 }
5165
5166 if ch == '/' && chars.peek() == Some(&'/') {
5167 chars.next();
5168 out.push(' ');
5169 out.push(' ');
5170 for next in chars.by_ref() {
5171 if next == '\n' {
5172 out.push('\n');
5173 break;
5174 }
5175 out.push(' ');
5176 }
5177 continue;
5178 }
5179
5180 if ch == '/' && chars.peek() == Some(&'*') {
5181 chars.next();
5182 out.push(' ');
5183 out.push(' ');
5184 let mut prev = '\0';
5185 for next in chars.by_ref() {
5186 if next == '\n' {
5187 out.push('\n');
5188 } else {
5189 out.push(' ');
5190 }
5191 if prev == '*' && next == '/' {
5192 break;
5193 }
5194 prev = next;
5195 }
5196 continue;
5197 }
5198
5199 out.push(ch);
5200 }
5201 out
5202}
5203
5204fn now_ms() -> i64 {
5205 std::time::SystemTime::now()
5206 .duration_since(std::time::UNIX_EPOCH)
5207 .map(|d| d.as_millis() as i64)
5208 .unwrap_or_default()
5209}