Skip to main content

codegraph/
extraction.rs

1use crate::config::CodeGraphConfig;
2use crate::types::*;
3use regex::Regex;
4use std::path::Path;
5use tree_sitter::{Node as SyntaxNode, Parser};
6
7type ExtractorFn = fn(
8    file_path: &str,
9    source: &str,
10    language: Language,
11    now: i64,
12    nodes: &mut Vec<Node>,
13    edges: &mut Vec<Edge>,
14    refs: &mut Vec<UnresolvedReference>,
15);
16
17#[derive(Clone, Copy)]
18struct LanguageExtractor {
19    name: &'static str,
20    languages: &'static [Language],
21    extract: ExtractorFn,
22}
23
24const RUST_LANGUAGES: &[Language] = &[Language::Rust];
25const MOONBIT_LANGUAGES: &[Language] = &[Language::MoonBit];
26const PYTHON_LANGUAGES: &[Language] = &[Language::Python];
27const GO_LANGUAGES: &[Language] = &[Language::Go];
28const JAVA_KOTLIN_LANGUAGES: &[Language] = &[Language::Java, Language::Kotlin];
29const CSHARP_LANGUAGES: &[Language] = &[Language::CSharp];
30const PHP_RUBY_LANGUAGES: &[Language] = &[Language::Php, Language::Ruby];
31const SWIFT_LANGUAGES: &[Language] = &[Language::Swift];
32const DART_PASCAL_SCALA_LANGUAGES: &[Language] =
33    &[Language::Dart, Language::Pascal, Language::Scala];
34const LIQUID_VUE_SVELTE_LANGUAGES: &[Language] =
35    &[Language::Liquid, Language::Vue, Language::Svelte];
36const TYPESCRIPT_JAVASCRIPT_LANGUAGES: &[Language] = &[
37    Language::TypeScript,
38    Language::Tsx,
39    Language::JavaScript,
40    Language::Jsx,
41];
42const GENERIC_LANGUAGES: &[Language] = &[Language::C, Language::Cpp, Language::Unknown];
43
44const LANGUAGE_EXTRACTORS: &[LanguageExtractor] = &[
45    LanguageExtractor {
46        name: "rust",
47        languages: RUST_LANGUAGES,
48        extract: extract_rust_entry,
49    },
50    LanguageExtractor {
51        name: "moonbit",
52        languages: MOONBIT_LANGUAGES,
53        extract: extract_moonbit_entry,
54    },
55    LanguageExtractor {
56        name: "typescript_javascript",
57        languages: TYPESCRIPT_JAVASCRIPT_LANGUAGES,
58        extract: extract_typescript_javascript_entry,
59    },
60    LanguageExtractor {
61        name: "python",
62        languages: PYTHON_LANGUAGES,
63        extract: extract_python_entry,
64    },
65    LanguageExtractor {
66        name: "go",
67        languages: GO_LANGUAGES,
68        extract: extract_go_entry,
69    },
70    LanguageExtractor {
71        name: "java_kotlin",
72        languages: JAVA_KOTLIN_LANGUAGES,
73        extract: extract_java_kotlin_entry,
74    },
75    LanguageExtractor {
76        name: "csharp",
77        languages: CSHARP_LANGUAGES,
78        extract: extract_csharp_entry,
79    },
80    LanguageExtractor {
81        name: "php_ruby",
82        languages: PHP_RUBY_LANGUAGES,
83        extract: extract_php_ruby_entry,
84    },
85    LanguageExtractor {
86        name: "swift",
87        languages: SWIFT_LANGUAGES,
88        extract: extract_swift_entry,
89    },
90    LanguageExtractor {
91        name: "dart_pascal_scala",
92        languages: DART_PASCAL_SCALA_LANGUAGES,
93        extract: extract_dart_pascal_scala_entry,
94    },
95    LanguageExtractor {
96        name: "liquid_vue_svelte",
97        languages: LIQUID_VUE_SVELTE_LANGUAGES,
98        extract: extract_liquid_vue_svelte_entry,
99    },
100    LanguageExtractor {
101        name: "generic",
102        languages: GENERIC_LANGUAGES,
103        extract: extract_generic_entry,
104    },
105];
106
107pub fn should_include_file(path: &Path, config: &CodeGraphConfig) -> bool {
108    let s = path.to_string_lossy().replace('\\', "/");
109    if s.starts_with(".codegraph/") {
110        return false;
111    }
112    if config.exclude.iter().any(|p| glob_match(p, &s)) {
113        return false;
114    }
115    config.include.iter().any(|p| glob_match(p, &s))
116}
117
118fn glob_match(pattern: &str, path: &str) -> bool {
119    let suffix = pattern.strip_prefix("**/*.");
120    if let Some(ext) = suffix {
121        return path.ends_with(&format!(".{}", ext));
122    }
123    if let Some(dir) = pattern
124        .strip_prefix("**/")
125        .and_then(|p| p.strip_suffix("/**"))
126    {
127        return path.contains(&format!("{}/", dir)) || path == dir;
128    }
129    if let Some(suffix) = pattern.strip_prefix("**/") {
130        return path.ends_with(suffix);
131    }
132    pattern == path
133}
134
135pub fn detect_language(path: &Path, _source: &str) -> Language {
136    let name = path
137        .file_name()
138        .and_then(|s| s.to_str())
139        .unwrap_or_default()
140        .to_lowercase();
141    if name == "moon.mod.json" || name == "moon.pkg.json" || name == "moon.pkg" {
142        return Language::MoonBit;
143    }
144    if name.ends_with(".mbt.md") {
145        return Language::MoonBit;
146    }
147    match path
148        .extension()
149        .and_then(|s| s.to_str())
150        .unwrap_or_default()
151        .to_lowercase()
152        .as_str()
153    {
154        "ts" => Language::TypeScript,
155        "tsx" => Language::Tsx,
156        "js" | "mjs" | "cjs" => Language::JavaScript,
157        "jsx" => Language::Jsx,
158        "py" | "pyw" => Language::Python,
159        "go" => Language::Go,
160        "rs" => Language::Rust,
161        "java" => Language::Java,
162        "c" | "h" => Language::C,
163        "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Language::Cpp,
164        "cs" => Language::CSharp,
165        "php" => Language::Php,
166        "rb" | "rake" => Language::Ruby,
167        "swift" => Language::Swift,
168        "kt" | "kts" => Language::Kotlin,
169        "dart" => Language::Dart,
170        "svelte" => Language::Svelte,
171        "vue" => Language::Vue,
172        "liquid" => Language::Liquid,
173        "pas" | "dpr" | "dpk" | "lpr" | "dfm" | "fmx" => Language::Pascal,
174        "scala" | "sc" => Language::Scala,
175        "mbt" | "mbti" => Language::MoonBit,
176        _ => Language::Unknown,
177    }
178}
179
180pub fn detect_parse_error(source: &str, language: Language) -> bool {
181    match language {
182        Language::Rust => tree_sitter_has_error(source, tree_sitter_rust::LANGUAGE.into()),
183        _ => false,
184    }
185}
186
187fn tree_sitter_has_error(source: &str, language: tree_sitter::Language) -> bool {
188    let mut parser = Parser::new();
189    if parser.set_language(&language).is_err() {
190        return false;
191    }
192    parser
193        .parse(source, None)
194        .map(|tree| tree.root_node().has_error())
195        .unwrap_or(false)
196}
197
198pub fn extract_from_source(path: &Path, source: &str, language: Language) -> ExtractionResult {
199    let file_path = path.to_string_lossy().replace('\\', "/");
200    let now = now_ms();
201    let mut nodes = vec![Node {
202        id: format!("file:{}", file_path),
203        kind: NodeKind::File,
204        name: path
205            .file_name()
206            .and_then(|s| s.to_str())
207            .unwrap_or(&file_path)
208            .to_string(),
209        qualified_name: file_path.clone(),
210        file_path: file_path.clone(),
211        language,
212        start_line: 1,
213        end_line: source.lines().count().max(1) as i64,
214        start_column: 0,
215        end_column: 0,
216        docstring: None,
217        signature: None,
218        visibility: None,
219        is_exported: false,
220        is_async: false,
221        is_static: false,
222        is_abstract: false,
223        updated_at: now,
224    }];
225    let mut edges = Vec::new();
226    let mut refs = Vec::new();
227
228    let extractor = extractor_for_language(language);
229    (extractor.extract)(
230        &file_path, &source, language, now, &mut nodes, &mut edges, &mut refs,
231    );
232
233    ExtractionResult {
234        nodes,
235        edges,
236        unresolved_references: refs,
237    }
238}
239
240pub fn registered_extractor_name(language: Language) -> &'static str {
241    extractor_for_language(language).name
242}
243
244fn extractor_for_language(language: Language) -> LanguageExtractor {
245    LANGUAGE_EXTRACTORS
246        .iter()
247        .copied()
248        .find(|extractor| extractor.languages.contains(&language))
249        .unwrap_or(LanguageExtractor {
250            name: "generic",
251            languages: &[],
252            extract: extract_generic_entry,
253        })
254}
255
256fn extract_rust_entry(
257    file_path: &str,
258    source: &str,
259    _language: Language,
260    now: i64,
261    nodes: &mut Vec<Node>,
262    edges: &mut Vec<Edge>,
263    refs: &mut Vec<UnresolvedReference>,
264) {
265    extract_rust(file_path, source, now, nodes, edges, refs);
266}
267
268fn extract_moonbit_entry(
269    file_path: &str,
270    source: &str,
271    _language: Language,
272    now: i64,
273    nodes: &mut Vec<Node>,
274    edges: &mut Vec<Edge>,
275    refs: &mut Vec<UnresolvedReference>,
276) {
277    extract_moonbit(file_path, source, now, nodes, edges, refs);
278}
279
280fn extract_typescript_javascript_entry(
281    file_path: &str,
282    source: &str,
283    language: Language,
284    now: i64,
285    nodes: &mut Vec<Node>,
286    edges: &mut Vec<Edge>,
287    refs: &mut Vec<UnresolvedReference>,
288) {
289    extract_typescript_javascript(file_path, source, language, now, nodes, edges, refs);
290}
291
292fn extract_python_entry(
293    file_path: &str,
294    source: &str,
295    _language: Language,
296    now: i64,
297    nodes: &mut Vec<Node>,
298    edges: &mut Vec<Edge>,
299    refs: &mut Vec<UnresolvedReference>,
300) {
301    extract_python(file_path, source, now, nodes, edges, refs);
302}
303
304fn extract_go_entry(
305    file_path: &str,
306    source: &str,
307    _language: Language,
308    now: i64,
309    nodes: &mut Vec<Node>,
310    edges: &mut Vec<Edge>,
311    refs: &mut Vec<UnresolvedReference>,
312) {
313    extract_go(file_path, source, now, nodes, edges, refs);
314}
315
316fn extract_java_kotlin_entry(
317    file_path: &str,
318    source: &str,
319    language: Language,
320    now: i64,
321    nodes: &mut Vec<Node>,
322    edges: &mut Vec<Edge>,
323    refs: &mut Vec<UnresolvedReference>,
324) {
325    extract_java_kotlin(file_path, source, language, now, nodes, edges, refs);
326}
327
328fn extract_csharp_entry(
329    file_path: &str,
330    source: &str,
331    _language: Language,
332    now: i64,
333    nodes: &mut Vec<Node>,
334    edges: &mut Vec<Edge>,
335    refs: &mut Vec<UnresolvedReference>,
336) {
337    extract_csharp(file_path, source, now, nodes, edges, refs);
338}
339
340fn extract_php_ruby_entry(
341    file_path: &str,
342    source: &str,
343    language: Language,
344    now: i64,
345    nodes: &mut Vec<Node>,
346    edges: &mut Vec<Edge>,
347    refs: &mut Vec<UnresolvedReference>,
348) {
349    extract_php_ruby(file_path, source, language, now, nodes, edges, refs);
350}
351
352fn extract_swift_entry(
353    file_path: &str,
354    source: &str,
355    _language: Language,
356    now: i64,
357    nodes: &mut Vec<Node>,
358    edges: &mut Vec<Edge>,
359    refs: &mut Vec<UnresolvedReference>,
360) {
361    extract_swift(file_path, source, now, nodes, edges, refs);
362}
363
364fn extract_dart_pascal_scala_entry(
365    file_path: &str,
366    source: &str,
367    language: Language,
368    now: i64,
369    nodes: &mut Vec<Node>,
370    edges: &mut Vec<Edge>,
371    refs: &mut Vec<UnresolvedReference>,
372) {
373    extract_dart_pascal_scala(file_path, source, language, now, nodes, edges, refs);
374}
375
376fn extract_liquid_vue_svelte_entry(
377    file_path: &str,
378    source: &str,
379    language: Language,
380    now: i64,
381    nodes: &mut Vec<Node>,
382    edges: &mut Vec<Edge>,
383    refs: &mut Vec<UnresolvedReference>,
384) {
385    extract_liquid_vue_svelte(file_path, source, language, now, nodes, edges, refs);
386}
387
388fn extract_generic_entry(
389    file_path: &str,
390    source: &str,
391    language: Language,
392    now: i64,
393    nodes: &mut Vec<Node>,
394    edges: &mut Vec<Edge>,
395    refs: &mut Vec<UnresolvedReference>,
396) {
397    extract_generic(file_path, source, language, now, nodes, edges, refs);
398}
399
400fn extract_typescript_javascript(
401    file_path: &str,
402    source: &str,
403    language: Language,
404    now: i64,
405    nodes: &mut Vec<Node>,
406    edges: &mut Vec<Edge>,
407    refs: &mut Vec<UnresolvedReference>,
408) {
409    add_regex_nodes(
410        file_path,
411        source,
412        language,
413        now,
414        nodes,
415        edges,
416        r"(?m)^\s*(export\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*([^{;]*)",
417        NodeKind::Function,
418    );
419    add_regex_nodes(
420        file_path,
421        source,
422        language,
423        now,
424        nodes,
425        edges,
426        r"(?m)^\s*(export\s+)?class\s+([A-Za-z_$][A-Za-z0-9_$]*)",
427        NodeKind::Class,
428    );
429    add_regex_nodes(
430        file_path,
431        source,
432        language,
433        now,
434        nodes,
435        edges,
436        r"(?m)^\s*(export\s+)?interface\s+([A-Za-z_$][A-Za-z0-9_$]*)",
437        NodeKind::Interface,
438    );
439    add_regex_nodes(
440        file_path,
441        source,
442        language,
443        now,
444        nodes,
445        edges,
446        r"(?m)^\s*(export\s+)?type\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=",
447        NodeKind::TypeAlias,
448    );
449    add_ts_js_arrow_functions(file_path, source, language, now, nodes, edges);
450    add_ts_js_imports(file_path, source, language, now, nodes, edges, refs);
451    add_tsx_jsx_components(file_path, language, now, nodes, edges);
452    extract_web_file_routes(file_path, language, now, nodes, edges, refs);
453    extract_ts_js_framework_routes(file_path, source, language, now, nodes, edges, refs);
454    add_call_refs(
455        file_path,
456        source,
457        language,
458        nodes,
459        refs,
460        r"([A-Za-z_$][A-Za-z0-9_$.]*)\s*\(",
461    );
462}
463
464fn extract_ts_js_framework_routes(
465    file_path: &str,
466    source: &str,
467    language: Language,
468    now: i64,
469    nodes: &mut Vec<Node>,
470    edges: &mut Vec<Edge>,
471    refs: &mut Vec<UnresolvedReference>,
472) {
473    let express_re = Regex::new(
474        r#"(?:app|router|server)\.(get|post|put|patch|delete|all)\s*\(\s*['"`]([^'"`]+)['"`]\s*,\s*(?:[A-Za-z_$][A-Za-z0-9_$]*\s*,\s*)*([A-Za-z_$][A-Za-z0-9_$]*)"#,
475    )
476    .unwrap();
477    for cap in express_re.captures_iter(source) {
478        let method = cap.get(1).unwrap().as_str().to_ascii_uppercase();
479        let path_match = cap.get(2).unwrap();
480        let handler = cap.get(3).map(|m| m.as_str());
481        add_framework_route_node(
482            file_path,
483            language,
484            now,
485            nodes,
486            edges,
487            refs,
488            &method,
489            path_match.as_str(),
490            handler,
491            line_for(source, path_match.start()),
492            Some(cap.get(0).unwrap().as_str().trim().to_string()),
493            "web-framework",
494        );
495    }
496
497    let react_router_re = Regex::new(
498        r#"<Route\b[^>]*\bpath\s*=\s*["']([^"']+)["'][^>]*(?:\belement\s*=\s*\{\s*<\s*([A-Z][A-Za-z0-9_$]*)|\bComponent\s*=\s*\{\s*([A-Z][A-Za-z0-9_$]*))"#,
499    )
500    .unwrap();
501    for cap in react_router_re.captures_iter(source) {
502        let path_match = cap.get(1).unwrap();
503        let handler = cap.get(2).or_else(|| cap.get(3)).map(|m| m.as_str());
504        add_framework_route_node(
505            file_path,
506            language,
507            now,
508            nodes,
509            edges,
510            refs,
511            "PAGE",
512            path_match.as_str(),
513            handler,
514            line_for(source, path_match.start()),
515            Some(cap.get(0).unwrap().as_str().trim().to_string()),
516            "web-framework",
517        );
518    }
519}
520
521fn add_ts_js_arrow_functions(
522    file_path: &str,
523    source: &str,
524    language: Language,
525    now: i64,
526    nodes: &mut Vec<Node>,
527    edges: &mut Vec<Edge>,
528) {
529    let re = Regex::new(
530        r"(?m)^\s*(export\s+)?const\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*(?::[^=]+)?=\s*(?:async\s+)?(?:\([^)]*\)|[A-Za-z_$][A-Za-z0-9_$]*)(?:\s*:\s*[^=;\n]+)?\s*=>",
531    )
532    .unwrap();
533    for cap in re.captures_iter(source) {
534        let name_match = cap.get(2).unwrap();
535        let mut node = make_node(
536            file_path,
537            language,
538            NodeKind::Function,
539            name_match.as_str(),
540            line_for(source, name_match.start()),
541            0,
542            now,
543            cap.get(0).map(|m| m.as_str().trim().to_string()),
544        );
545        node.is_exported = cap.get(1).is_some();
546        node.visibility = node.is_exported.then(|| "public".to_string());
547        node.is_async = cap
548            .get(0)
549            .map(|m| m.as_str().contains("async"))
550            .unwrap_or(false);
551        add_contains(nodes, edges, &node);
552        nodes.push(node);
553    }
554}
555
556fn add_ts_js_imports(
557    file_path: &str,
558    source: &str,
559    language: Language,
560    now: i64,
561    nodes: &mut Vec<Node>,
562    edges: &mut Vec<Edge>,
563    refs: &mut Vec<UnresolvedReference>,
564) {
565    let re =
566        Regex::new(r#"(?m)^\s*import(?:\s+type)?(?:\s+[^;\n]*?\s+from)?\s+['"]([^'"]+)['"]\s*;?"#)
567            .unwrap();
568    for cap in re.captures_iter(source) {
569        let module = cap.get(1).unwrap();
570        let signature = cap.get(0).unwrap().as_str().trim().to_string();
571        let node = make_node(
572            file_path,
573            language,
574            NodeKind::Import,
575            module.as_str(),
576            line_for(source, module.start()),
577            0,
578            now,
579            Some(signature),
580        );
581        add_contains(nodes, edges, &node);
582        refs.push(unresolved(
583            &nodes[0].id,
584            module.as_str(),
585            EdgeKind::Imports,
586            file_path,
587            language,
588            node.start_line,
589        ));
590        nodes.push(node);
591    }
592}
593
594fn add_tsx_jsx_components(
595    file_path: &str,
596    language: Language,
597    now: i64,
598    nodes: &mut Vec<Node>,
599    edges: &mut Vec<Edge>,
600) {
601    if !matches!(language, Language::Tsx | Language::Jsx) {
602        return;
603    }
604    let component_names: Vec<(String, i64, bool, Option<String>)> = nodes
605        .iter()
606        .filter(|node| matches!(node.kind, NodeKind::Function | NodeKind::Class))
607        .filter(|node| node.name.chars().next().is_some_and(char::is_uppercase))
608        .map(|node| {
609            (
610                node.name.clone(),
611                node.start_line,
612                node.is_exported,
613                node.signature.clone(),
614            )
615        })
616        .collect();
617    for (name, line, is_exported, signature) in component_names {
618        let mut node = make_node(
619            file_path,
620            language,
621            NodeKind::Component,
622            &name,
623            line,
624            0,
625            now,
626            signature,
627        );
628        node.is_exported = is_exported;
629        node.visibility = node.is_exported.then(|| "public".to_string());
630        add_contains(nodes, edges, &node);
631        nodes.push(node);
632    }
633}
634
635fn extract_python(
636    file_path: &str,
637    source: &str,
638    now: i64,
639    nodes: &mut Vec<Node>,
640    edges: &mut Vec<Edge>,
641    refs: &mut Vec<UnresolvedReference>,
642) {
643    let class_re =
644        Regex::new(r"^([ \t]*)class\s+([A-Za-z_][A-Za-z0-9_]*)(?:\s*\([^)]*\))?\s*:").unwrap();
645    let def_re = Regex::new(
646        r"^([ \t]*)(async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?:->\s*[^:]+)?\s*:",
647    )
648    .unwrap();
649    let decorator_re = Regex::new(r"^([ \t]*)@([A-Za-z_][A-Za-z0-9_\.]*(?:\([^)]*\))?)").unwrap();
650
651    let mut class_stack: Vec<(usize, String, String)> = Vec::new();
652    let mut pending_decorators: Vec<(usize, String, i64)> = Vec::new();
653
654    for (line_idx, line) in source.lines().enumerate() {
655        let line_no = line_idx as i64 + 1;
656        let trimmed = line.trim();
657        if trimmed.is_empty() || trimmed.starts_with('#') {
658            continue;
659        }
660
661        let indent = python_indent_width(line);
662        while class_stack.last().is_some_and(|(class_indent, _, _)| {
663            indent <= *class_indent && !trimmed.starts_with('@')
664        }) {
665            class_stack.pop();
666        }
667        pending_decorators.retain(|(decorator_indent, _, _)| *decorator_indent == indent);
668
669        if let Some(cap) = decorator_re.captures(line) {
670            pending_decorators.push((indent, cap[2].to_string(), line_no));
671            continue;
672        }
673
674        if let Some(cap) = class_re.captures(line) {
675            let name = cap[2].to_string();
676            let node = make_node(
677                file_path,
678                Language::Python,
679                NodeKind::Class,
680                &name,
681                line_no,
682                indent as i64,
683                now,
684                Some(trimmed.to_string()),
685            );
686            add_contains(nodes, edges, &node);
687            class_stack.push((indent, name, node.id.clone()));
688            nodes.push(node);
689            pending_decorators.clear();
690            continue;
691        }
692
693        if let Some(cap) = def_re.captures(line) {
694            let name = cap[3].to_string();
695            for (_, decorator, decorator_line) in &pending_decorators {
696                if let Some((method, path)) = python_route_decorator(decorator) {
697                    add_framework_route_node(
698                        file_path,
699                        Language::Python,
700                        now,
701                        nodes,
702                        edges,
703                        refs,
704                        method,
705                        path,
706                        Some(&name),
707                        *decorator_line,
708                        Some(format!("@{decorator}")),
709                        "web-framework",
710                    );
711                }
712            }
713            let parent_class = class_stack
714                .iter()
715                .rev()
716                .find(|(class_indent, _, _)| indent > *class_indent);
717            let kind = if parent_class.is_some() {
718                NodeKind::Method
719            } else {
720                NodeKind::Function
721            };
722            let mut signature_lines: Vec<String> = pending_decorators
723                .iter()
724                .map(|(_, decorator, _)| format!("@{}", decorator))
725                .collect();
726            signature_lines.push(trimmed.to_string());
727            let mut node = make_node(
728                file_path,
729                Language::Python,
730                kind,
731                &name,
732                line_no,
733                indent as i64,
734                now,
735                Some(signature_lines.join("\n")),
736            );
737            node.is_async = cap.get(2).is_some();
738            node.is_static = pending_decorators
739                .iter()
740                .any(|(_, decorator, _)| decorator == "staticmethod");
741            if let Some((_, class_name, class_id)) = parent_class {
742                node.qualified_name = format!("{}.{}", class_name, name);
743                edges.push(Edge {
744                    id: None,
745                    source: class_id.clone(),
746                    target: node.id.clone(),
747                    kind: EdgeKind::Contains,
748                    line: None,
749                    col: None,
750                    provenance: Some("python".into()),
751                });
752            } else {
753                add_contains(nodes, edges, &node);
754            }
755
756            for (_, decorator, decorator_line) in &pending_decorators {
757                let decorator_name = python_decorator_reference_name(decorator);
758                refs_push(
759                    refs,
760                    &node.id,
761                    &decorator_name,
762                    EdgeKind::Decorates,
763                    file_path,
764                    Language::Python,
765                    *decorator_line,
766                    0,
767                );
768            }
769            nodes.push(node);
770            pending_decorators.clear();
771            continue;
772        }
773
774        extract_python_imports(file_path, line, line_no, now, nodes, edges, refs);
775        pending_decorators.clear();
776    }
777
778    add_call_refs(
779        file_path,
780        source,
781        Language::Python,
782        nodes,
783        refs,
784        r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
785    );
786}
787
788fn python_route_decorator(decorator: &str) -> Option<(&'static str, &str)> {
789    let open = decorator.find('(')?;
790    let callee = decorator[..open].trim();
791    let args = &decorator[open + 1..];
792    let method = if callee.ends_with(".get") {
793        "GET"
794    } else if callee.ends_with(".post") {
795        "POST"
796    } else if callee.ends_with(".put") {
797        "PUT"
798    } else if callee.ends_with(".patch") {
799        "PATCH"
800    } else if callee.ends_with(".delete") {
801        "DELETE"
802    } else if callee.ends_with(".route") || callee.ends_with(".api_route") {
803        "ROUTE"
804    } else {
805        return None;
806    };
807    first_quoted_arg(args).map(|path| (method, path))
808}
809
810fn python_decorator_reference_name(decorator: &str) -> String {
811    decorator
812        .split('(')
813        .next()
814        .unwrap_or(decorator)
815        .trim()
816        .to_string()
817}
818
819fn extract_python_imports(
820    file_path: &str,
821    line: &str,
822    line_no: i64,
823    now: i64,
824    nodes: &mut Vec<Node>,
825    edges: &mut Vec<Edge>,
826    refs: &mut Vec<UnresolvedReference>,
827) {
828    let import_re = Regex::new(r"^\s*import\s+(.+)$").unwrap();
829    let from_re = Regex::new(r"^\s*from\s+([A-Za-z_\.][A-Za-z0-9_\.]*)\s+import\s+(.+)$").unwrap();
830    let Some(file_id) = nodes.first().map(|node| node.id.clone()) else {
831        return;
832    };
833
834    if let Some(cap) = import_re.captures(line) {
835        for spec in cap[1]
836            .split(',')
837            .map(str::trim)
838            .filter(|spec| !spec.is_empty())
839        {
840            let module = spec.split_whitespace().next().unwrap_or(spec);
841            add_python_import_node(
842                file_path,
843                module,
844                line.trim(),
845                line_no,
846                now,
847                nodes,
848                edges,
849                refs,
850                &file_id,
851            );
852        }
853        return;
854    }
855
856    if let Some(cap) = from_re.captures(line) {
857        let module = cap[1].trim();
858        add_python_import_node(
859            file_path,
860            module,
861            line.trim(),
862            line_no,
863            now,
864            nodes,
865            edges,
866            refs,
867            &file_id,
868        );
869    }
870}
871
872#[allow(clippy::too_many_arguments)]
873fn add_python_import_node(
874    file_path: &str,
875    module: &str,
876    signature: &str,
877    line_no: i64,
878    now: i64,
879    nodes: &mut Vec<Node>,
880    edges: &mut Vec<Edge>,
881    refs: &mut Vec<UnresolvedReference>,
882    file_id: &str,
883) {
884    let node = make_node(
885        file_path,
886        Language::Python,
887        NodeKind::Import,
888        module,
889        line_no,
890        0,
891        now,
892        Some(signature.to_string()),
893    );
894    edges.push(Edge {
895        id: None,
896        source: file_id.to_string(),
897        target: node.id.clone(),
898        kind: EdgeKind::Contains,
899        line: None,
900        col: None,
901        provenance: Some("python".into()),
902    });
903    refs_push(
904        refs,
905        file_id,
906        module,
907        EdgeKind::Imports,
908        file_path,
909        Language::Python,
910        line_no,
911        0,
912    );
913    nodes.push(node);
914}
915
916fn python_indent_width(line: &str) -> usize {
917    line.chars()
918        .take_while(|ch| matches!(ch, ' ' | '\t'))
919        .map(|ch| if ch == '\t' { 4 } else { 1 })
920        .sum()
921}
922
923fn extract_go(
924    file_path: &str,
925    source: &str,
926    now: i64,
927    nodes: &mut Vec<Node>,
928    edges: &mut Vec<Edge>,
929    refs: &mut Vec<UnresolvedReference>,
930) {
931    let package_re = Regex::new(r"(?m)^\s*package\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap();
932    if let Some(cap) = package_re.captures(source) {
933        let package = cap.get(1).unwrap();
934        let node = make_node(
935            file_path,
936            Language::Go,
937            NodeKind::Module,
938            package.as_str(),
939            line_for(source, package.start()),
940            0,
941            now,
942            cap.get(0).map(|m| m.as_str().trim().to_string()),
943        );
944        add_contains(nodes, edges, &node);
945        nodes.push(node);
946    }
947
948    add_go_imports(file_path, source, now, nodes, edges, refs);
949    add_go_types(file_path, source, now, nodes, edges);
950    add_go_functions(file_path, source, now, nodes, edges);
951    add_go_call_refs(file_path, source, nodes, refs);
952}
953
954fn add_go_functions(
955    file_path: &str,
956    source: &str,
957    now: i64,
958    nodes: &mut Vec<Node>,
959    edges: &mut Vec<Edge>,
960) {
961    let method_re = Regex::new(
962        r"(?m)^\s*func\s+\(\s*(?:[A-Za-z_][A-Za-z0-9_]*\s+)?\*?\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)\s*([A-Za-z_][A-Za-z0-9_]*)\s*(\([^)]*\)\s*(?:\([^)]*\)|[A-Za-z_][A-Za-z0-9_\.\[\]]*)?)",
963    )
964    .unwrap();
965    for cap in method_re.captures_iter(source) {
966        let receiver = cap.get(1).unwrap().as_str();
967        let name = cap.get(2).unwrap().as_str();
968        let signature = cap.get(0).map(|m| m.as_str().trim().to_string());
969        let mut node = make_node(
970            file_path,
971            Language::Go,
972            NodeKind::Method,
973            name,
974            line_for(source, cap.get(2).unwrap().start()),
975            0,
976            now,
977            signature,
978        );
979        node.qualified_name = format!("{}.{}", receiver, name);
980        add_contains(nodes, edges, &node);
981        nodes.push(node);
982    }
983
984    let function_re = Regex::new(
985        r"(?m)^\s*func\s+([A-Za-z_][A-Za-z0-9_]*)\s*(\([^)]*\)\s*(?:\([^)]*\)|[A-Za-z_][A-Za-z0-9_\.\[\]]*)?)",
986    )
987    .unwrap();
988    for cap in function_re.captures_iter(source) {
989        let name = cap.get(1).unwrap().as_str();
990        let signature = cap.get(0).map(|m| m.as_str().trim().to_string());
991        let node = make_node(
992            file_path,
993            Language::Go,
994            NodeKind::Function,
995            name,
996            line_for(source, cap.get(1).unwrap().start()),
997            0,
998            now,
999            signature,
1000        );
1001        add_contains(nodes, edges, &node);
1002        nodes.push(node);
1003    }
1004}
1005
1006fn add_go_types(
1007    file_path: &str,
1008    source: &str,
1009    now: i64,
1010    nodes: &mut Vec<Node>,
1011    edges: &mut Vec<Edge>,
1012) {
1013    let type_re =
1014        Regex::new(r"(?m)^\s*type\s+([A-Za-z_][A-Za-z0-9_]*)\s+(struct|interface)\s*\{").unwrap();
1015    for cap in type_re.captures_iter(source) {
1016        let kind = match cap.get(2).unwrap().as_str() {
1017            "struct" => NodeKind::Struct,
1018            "interface" => NodeKind::Interface,
1019            _ => continue,
1020        };
1021        let name = cap.get(1).unwrap();
1022        let node = make_node(
1023            file_path,
1024            Language::Go,
1025            kind,
1026            name.as_str(),
1027            line_for(source, name.start()),
1028            0,
1029            now,
1030            cap.get(0).map(|m| m.as_str().trim().to_string()),
1031        );
1032        add_contains(nodes, edges, &node);
1033        nodes.push(node);
1034    }
1035}
1036
1037fn add_go_imports(
1038    file_path: &str,
1039    source: &str,
1040    now: i64,
1041    nodes: &mut Vec<Node>,
1042    edges: &mut Vec<Edge>,
1043    refs: &mut Vec<UnresolvedReference>,
1044) {
1045    let single_re =
1046        Regex::new(r#"(?m)^\s*import\s+(?:(\.|_|[A-Za-z_][A-Za-z0-9_]*)\s+)?"([^"]+)""#).unwrap();
1047    for cap in single_re.captures_iter(source) {
1048        let module = cap.get(2).unwrap();
1049        add_go_import_node(
1050            file_path,
1051            module.as_str(),
1052            cap.get(0).unwrap().as_str().trim(),
1053            line_for(source, module.start()),
1054            now,
1055            nodes,
1056            edges,
1057            refs,
1058        );
1059    }
1060
1061    let block_re = Regex::new(r#"(?ms)^\s*import\s*\((?P<body>.*?)\)"#).unwrap();
1062    let item_re = Regex::new(r#"(?m)^\s*(?:(\.|_|[A-Za-z_][A-Za-z0-9_]*)\s+)?"([^"]+)""#).unwrap();
1063    for block in block_re.captures_iter(source) {
1064        let Some(body) = block.name("body") else {
1065            continue;
1066        };
1067        for cap in item_re.captures_iter(body.as_str()) {
1068            let module = cap.get(2).unwrap();
1069            let absolute_module_start = body.start() + module.start();
1070            add_go_import_node(
1071                file_path,
1072                module.as_str(),
1073                cap.get(0).unwrap().as_str().trim(),
1074                line_for(source, absolute_module_start),
1075                now,
1076                nodes,
1077                edges,
1078                refs,
1079            );
1080        }
1081    }
1082}
1083
1084#[allow(clippy::too_many_arguments)]
1085fn add_go_import_node(
1086    file_path: &str,
1087    module: &str,
1088    signature: &str,
1089    line: i64,
1090    now: i64,
1091    nodes: &mut Vec<Node>,
1092    edges: &mut Vec<Edge>,
1093    refs: &mut Vec<UnresolvedReference>,
1094) {
1095    let Some(file_id) = nodes.first().map(|node| node.id.clone()) else {
1096        return;
1097    };
1098    let node = make_node(
1099        file_path,
1100        Language::Go,
1101        NodeKind::Import,
1102        module,
1103        line,
1104        0,
1105        now,
1106        Some(signature.to_string()),
1107    );
1108    add_contains(nodes, edges, &node);
1109    refs_push(
1110        refs,
1111        &file_id,
1112        module,
1113        EdgeKind::Imports,
1114        file_path,
1115        Language::Go,
1116        line,
1117        0,
1118    );
1119    nodes.push(node);
1120}
1121
1122fn add_go_call_refs(
1123    file_path: &str,
1124    source: &str,
1125    nodes: &[Node],
1126    refs: &mut Vec<UnresolvedReference>,
1127) {
1128    let call_re = Regex::new(r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(").unwrap();
1129    let keywords = [
1130        "append", "cap", "close", "complex", "copy", "delete", "func", "if", "imag", "len", "make",
1131        "new", "panic", "print", "println", "real", "recover", "return", "switch",
1132    ];
1133    for cap in call_re.captures_iter(source) {
1134        let name_match = cap.get(1).unwrap();
1135        let name = name_match.as_str();
1136        let line = line_for(source, name_match.start());
1137        let line_text = source
1138            .lines()
1139            .nth(line.saturating_sub(1) as usize)
1140            .unwrap_or_default()
1141            .trim_start();
1142        if keywords.contains(&name)
1143            || line_text.starts_with("func ")
1144            || line_text.starts_with("type ")
1145        {
1146            continue;
1147        }
1148        if let Some(caller) = nodes
1149            .iter()
1150            .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
1151            .rev()
1152            .find(|n| n.start_line <= line)
1153        {
1154            refs_push(
1155                refs,
1156                &caller.id,
1157                name,
1158                EdgeKind::Calls,
1159                file_path,
1160                Language::Go,
1161                line,
1162                0,
1163            );
1164        }
1165    }
1166}
1167
1168fn extract_java_kotlin(
1169    file_path: &str,
1170    source: &str,
1171    language: Language,
1172    now: i64,
1173    nodes: &mut Vec<Node>,
1174    edges: &mut Vec<Edge>,
1175    refs: &mut Vec<UnresolvedReference>,
1176) {
1177    add_java_kotlin_imports(file_path, source, language, now, nodes, edges, refs);
1178    add_java_kotlin_types_and_members(file_path, source, language, now, nodes, edges, refs);
1179    add_call_refs(
1180        file_path,
1181        source,
1182        language,
1183        nodes,
1184        refs,
1185        r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
1186    );
1187}
1188
1189fn add_java_kotlin_imports(
1190    file_path: &str,
1191    source: &str,
1192    language: Language,
1193    now: i64,
1194    nodes: &mut Vec<Node>,
1195    edges: &mut Vec<Edge>,
1196    refs: &mut Vec<UnresolvedReference>,
1197) {
1198    let re =
1199        Regex::new(r"(?m)^\s*import\s+(?:static\s+)?([A-Za-z_][A-Za-z0-9_\.\*]*)\s*;?").unwrap();
1200    for cap in re.captures_iter(source) {
1201        let module = cap.get(1).unwrap();
1202        let node = make_node(
1203            file_path,
1204            language,
1205            NodeKind::Import,
1206            module.as_str(),
1207            line_for(source, module.start()),
1208            0,
1209            now,
1210            cap.get(0).map(|m| m.as_str().trim().to_string()),
1211        );
1212        add_contains(nodes, edges, &node);
1213        refs_push(
1214            refs,
1215            &nodes[0].id,
1216            module.as_str(),
1217            EdgeKind::Imports,
1218            file_path,
1219            language,
1220            node.start_line,
1221            0,
1222        );
1223        nodes.push(node);
1224    }
1225}
1226
1227fn add_java_kotlin_types_and_members(
1228    file_path: &str,
1229    source: &str,
1230    language: Language,
1231    now: i64,
1232    nodes: &mut Vec<Node>,
1233    edges: &mut Vec<Edge>,
1234    refs: &mut Vec<UnresolvedReference>,
1235) {
1236    let type_re = Regex::new(
1237        r"^\s*(?:(public|private|protected|internal)\s+)?(?:(?:data|abstract|open|final|sealed|enum)\s+)*(class|interface|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{]*)\{?",
1238    )
1239    .unwrap();
1240    let java_method_re = Regex::new(
1241        r"^\s*(?:(public|private|protected)\s+)?((?:static|final|abstract|synchronized)\s+)*(?:[A-Za-z_][A-Za-z0-9_<>,\.\?\[\]\s]*\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\([^;{}]*\)\s*(?:throws\s+[A-Za-z0-9_,\.\s]+)?\{?",
1242    )
1243    .unwrap();
1244    let kotlin_fun_re = Regex::new(
1245        r"^\s*(?:(public|private|protected|internal)\s+)?((?:suspend|inline|open|override|private|public|protected|internal)\s+)*fun\s+(?:(?P<receiver>[A-Za-z_][A-Za-z0-9_\.]*)\.)?(?P<name>[A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?::\s*[A-Za-z_][A-Za-z0-9_<>,\.\?\s]*)?",
1246    )
1247    .unwrap();
1248    let annotation_re = Regex::new(r"^\s*@([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
1249    let mut type_stack: Vec<(usize, String, String)> = Vec::new();
1250    let mut pending_annotations: Vec<(String, i64)> = Vec::new();
1251
1252    for (idx, line) in source.lines().enumerate() {
1253        let line_no = idx as i64 + 1;
1254        let trimmed = line.trim();
1255        if trimmed.is_empty() || trimmed.starts_with("//") {
1256            continue;
1257        }
1258        let indent = python_indent_width(line);
1259        while type_stack
1260            .last()
1261            .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
1262        {
1263            type_stack.pop();
1264        }
1265
1266        if let Some(cap) = annotation_re.captures(line) {
1267            pending_annotations.push((cap[1].to_string(), line_no));
1268            continue;
1269        }
1270
1271        if let Some(cap) = type_re.captures(line) {
1272            let keyword = cap.get(2).unwrap().as_str();
1273            let kind = match keyword {
1274                "interface" => NodeKind::Interface,
1275                "enum" => NodeKind::Enum,
1276                _ => NodeKind::Class,
1277            };
1278            let name = cap.get(3).unwrap();
1279            let mut node = make_node(
1280                file_path,
1281                language,
1282                kind,
1283                name.as_str(),
1284                line_no,
1285                indent as i64,
1286                now,
1287                Some(java_kotlin_signature(&pending_annotations, trimmed)),
1288            );
1289            node.visibility = cap.get(1).map(|m| m.as_str().to_string());
1290            node.is_exported = node
1291                .visibility
1292                .as_deref()
1293                .map(|visibility| visibility == "public")
1294                .unwrap_or(language == Language::Kotlin);
1295            add_contains(nodes, edges, &node);
1296            add_java_kotlin_metadata_refs(
1297                &node.id,
1298                cap.get(4).map(|m| m.as_str()).unwrap_or_default(),
1299                &pending_annotations,
1300                file_path,
1301                language,
1302                line_no,
1303                refs,
1304            );
1305            if !trimmed.contains('}') {
1306                type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
1307            }
1308            nodes.push(node);
1309            pending_annotations.clear();
1310            continue;
1311        }
1312
1313        let member = match language {
1314            Language::Kotlin => kotlin_fun_re.captures(line).map(|cap| {
1315                (
1316                    cap.name("name").unwrap().as_str().to_string(),
1317                    cap.name("receiver").map(|m| m.as_str().to_string()),
1318                    cap.get(1).map(|m| m.as_str().to_string()),
1319                    cap.get(2)
1320                        .map(|m| m.as_str().contains("suspend"))
1321                        .unwrap_or(false),
1322                    cap.get(0).unwrap().as_str().trim().to_string(),
1323                )
1324            }),
1325            _ => java_method_re.captures(line).and_then(|cap| {
1326                let name = cap.get(3).unwrap().as_str();
1327                let skip = matches!(
1328                    name,
1329                    "if" | "for" | "while" | "switch" | "catch" | "return" | "new"
1330                );
1331                (!skip).then(|| {
1332                    (
1333                        name.to_string(),
1334                        None,
1335                        cap.get(1).map(|m| m.as_str().to_string()),
1336                        false,
1337                        cap.get(0).unwrap().as_str().trim().to_string(),
1338                    )
1339                })
1340            }),
1341        };
1342        if let Some((name, receiver, visibility, is_async, signature)) = member {
1343            let kind = if type_stack.is_empty() && language == Language::Kotlin {
1344                NodeKind::Function
1345            } else {
1346                NodeKind::Method
1347            };
1348            let mut node = make_node(
1349                file_path,
1350                language,
1351                kind,
1352                &name,
1353                line_no,
1354                indent as i64,
1355                now,
1356                Some(java_kotlin_signature(&pending_annotations, &signature)),
1357            );
1358            node.visibility = visibility;
1359            node.is_exported = node
1360                .visibility
1361                .as_deref()
1362                .map(|visibility| visibility == "public")
1363                .unwrap_or(language == Language::Kotlin);
1364            node.is_async = is_async;
1365            node.is_static = signature.contains(" static ");
1366            if let Some(receiver) =
1367                receiver.or_else(|| type_stack.last().map(|(_, name, _)| name.clone()))
1368            {
1369                node.qualified_name = format!("{}.{}", receiver, name);
1370            }
1371            if let Some((_, _, parent_id)) = type_stack.last() {
1372                edges.push(Edge {
1373                    id: None,
1374                    source: parent_id.clone(),
1375                    target: node.id.clone(),
1376                    kind: EdgeKind::Contains,
1377                    line: None,
1378                    col: None,
1379                    provenance: Some(language.as_str().into()),
1380                });
1381            } else {
1382                add_contains(nodes, edges, &node);
1383            }
1384            for (annotation, annotation_line) in &pending_annotations {
1385                refs_push(
1386                    refs,
1387                    &node.id,
1388                    annotation,
1389                    EdgeKind::Decorates,
1390                    file_path,
1391                    language,
1392                    *annotation_line,
1393                    0,
1394                );
1395            }
1396            nodes.push(node);
1397            pending_annotations.clear();
1398            continue;
1399        }
1400
1401        pending_annotations.clear();
1402    }
1403}
1404
1405fn java_kotlin_signature(annotations: &[(String, i64)], declaration: &str) -> String {
1406    if annotations.is_empty() {
1407        declaration.to_string()
1408    } else {
1409        let mut lines: Vec<String> = annotations
1410            .iter()
1411            .map(|(annotation, _)| format!("@{}", annotation))
1412            .collect();
1413        lines.push(declaration.to_string());
1414        lines.join("\n")
1415    }
1416}
1417
1418#[allow(clippy::too_many_arguments)]
1419fn add_java_kotlin_metadata_refs(
1420    node_id: &str,
1421    tail: &str,
1422    annotations: &[(String, i64)],
1423    file_path: &str,
1424    language: Language,
1425    line: i64,
1426    refs: &mut Vec<UnresolvedReference>,
1427) {
1428    let extends_re = Regex::new(r"\bextends\s+([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
1429    let implements_re = Regex::new(r"\bimplements\s+([A-Za-z_][A-Za-z0-9_\.,\s]*)").unwrap();
1430    let kotlin_super_re = Regex::new(r":\s*([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
1431    if let Some(cap) = extends_re
1432        .captures(tail)
1433        .or_else(|| kotlin_super_re.captures(tail))
1434    {
1435        refs_push(
1436            refs,
1437            node_id,
1438            cap.get(1).unwrap().as_str(),
1439            EdgeKind::Extends,
1440            file_path,
1441            language,
1442            line,
1443            0,
1444        );
1445    }
1446    if let Some(cap) = implements_re.captures(tail) {
1447        for name in cap[1]
1448            .split(',')
1449            .map(str::trim)
1450            .filter(|name| !name.is_empty())
1451        {
1452            refs_push(
1453                refs,
1454                node_id,
1455                name,
1456                EdgeKind::Implements,
1457                file_path,
1458                language,
1459                line,
1460                0,
1461            );
1462        }
1463    }
1464    for (annotation, annotation_line) in annotations {
1465        refs_push(
1466            refs,
1467            node_id,
1468            annotation,
1469            EdgeKind::Decorates,
1470            file_path,
1471            language,
1472            *annotation_line,
1473            0,
1474        );
1475    }
1476}
1477
1478fn extract_csharp(
1479    file_path: &str,
1480    source: &str,
1481    now: i64,
1482    nodes: &mut Vec<Node>,
1483    edges: &mut Vec<Edge>,
1484    refs: &mut Vec<UnresolvedReference>,
1485) {
1486    add_csharp_usings(file_path, source, now, nodes, edges, refs);
1487    add_csharp_types_and_members(file_path, source, now, nodes, edges, refs);
1488    add_csharp_call_refs(file_path, source, nodes, refs);
1489}
1490
1491fn add_csharp_usings(
1492    file_path: &str,
1493    source: &str,
1494    now: i64,
1495    nodes: &mut Vec<Node>,
1496    edges: &mut Vec<Edge>,
1497    refs: &mut Vec<UnresolvedReference>,
1498) {
1499    let re = Regex::new(
1500        r"(?m)^\s*using\s+(?:static\s+)?(?:(?:[A-Za-z_][A-Za-z0-9_]*)\s*=\s*)?([A-Za-z_][A-Za-z0-9_\.]*)\s*;",
1501    )
1502    .unwrap();
1503    for cap in re.captures_iter(source) {
1504        let module = cap.get(1).unwrap();
1505        let node = make_node(
1506            file_path,
1507            Language::CSharp,
1508            NodeKind::Import,
1509            module.as_str(),
1510            line_for(source, module.start()),
1511            0,
1512            now,
1513            cap.get(0).map(|m| m.as_str().trim().to_string()),
1514        );
1515        add_contains(nodes, edges, &node);
1516        refs_push(
1517            refs,
1518            &nodes[0].id,
1519            module.as_str(),
1520            EdgeKind::Imports,
1521            file_path,
1522            Language::CSharp,
1523            node.start_line,
1524            0,
1525        );
1526        nodes.push(node);
1527    }
1528}
1529
1530fn add_csharp_types_and_members(
1531    file_path: &str,
1532    source: &str,
1533    now: i64,
1534    nodes: &mut Vec<Node>,
1535    edges: &mut Vec<Edge>,
1536    refs: &mut Vec<UnresolvedReference>,
1537) {
1538    let type_re = Regex::new(
1539        r"^\s*(?:(public|private|protected|internal)\s+)?(?:(?:abstract|sealed|static|partial)\s+)*(class|interface|struct|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{]*)\{?",
1540    )
1541    .unwrap();
1542    let method_re = Regex::new(
1543        r"^\s*(?:(public|private|protected|internal)\s+)?((?:static|async|virtual|override|abstract|sealed|partial)\s+)*(?:[A-Za-z_][A-Za-z0-9_<>,\.\?\[\]\s]*\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\([^;{}]*\)\s*(?:where\s+[^{]+)?\{?",
1544    )
1545    .unwrap();
1546    let property_re = Regex::new(
1547        r"^\s*(?:(public|private|protected|internal)\s+)?((?:static|virtual|override|abstract|sealed)\s+)*(?:[A-Za-z_][A-Za-z0-9_<>,\.\?\[\]\s]*\s+)([A-Za-z_][A-Za-z0-9_]*)\s*\{",
1548    )
1549    .unwrap();
1550    let attribute_re = Regex::new(r"^\s*\[\s*([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
1551    let mut type_stack: Vec<(usize, String, String)> = Vec::new();
1552    let mut pending_attributes: Vec<(String, i64)> = Vec::new();
1553
1554    for (idx, line) in source.lines().enumerate() {
1555        let line_no = idx as i64 + 1;
1556        let trimmed = line.trim();
1557        if trimmed.is_empty() || trimmed.starts_with("//") {
1558            continue;
1559        }
1560        let indent = python_indent_width(line);
1561        while type_stack
1562            .last()
1563            .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
1564        {
1565            type_stack.pop();
1566        }
1567
1568        if let Some(cap) = attribute_re.captures(line) {
1569            pending_attributes.push((cap[1].to_string(), line_no));
1570            continue;
1571        }
1572
1573        if let Some(cap) = type_re.captures(line) {
1574            let keyword = cap.get(2).unwrap().as_str();
1575            let kind = match keyword {
1576                "interface" => NodeKind::Interface,
1577                "struct" => NodeKind::Struct,
1578                "enum" => NodeKind::Enum,
1579                _ => NodeKind::Class,
1580            };
1581            let name = cap.get(3).unwrap();
1582            let mut node = make_node(
1583                file_path,
1584                Language::CSharp,
1585                kind,
1586                name.as_str(),
1587                line_no,
1588                indent as i64,
1589                now,
1590                Some(csharp_signature(&pending_attributes, trimmed)),
1591            );
1592            node.visibility = cap
1593                .get(1)
1594                .map(|m| m.as_str().to_string())
1595                .or_else(|| Some("private".to_string()));
1596            node.is_exported = node.visibility.as_deref() == Some("public");
1597            add_contains(nodes, edges, &node);
1598            add_csharp_metadata_refs(
1599                &node.id,
1600                cap.get(4).map(|m| m.as_str()).unwrap_or_default(),
1601                &pending_attributes,
1602                file_path,
1603                line_no,
1604                refs,
1605            );
1606            if !trimmed.contains('}') {
1607                type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
1608            }
1609            nodes.push(node);
1610            pending_attributes.clear();
1611            continue;
1612        }
1613
1614        let member = method_re
1615            .captures(line)
1616            .and_then(|cap| {
1617                let name = cap.get(3).unwrap().as_str();
1618                let skip = matches!(
1619                    name,
1620                    "if" | "for" | "foreach" | "while" | "switch" | "catch" | "return" | "new"
1621                );
1622                (!skip).then(|| {
1623                    (
1624                        NodeKind::Method,
1625                        name.to_string(),
1626                        cap.get(1).map(|m| m.as_str().to_string()),
1627                        cap.get(2)
1628                            .map(|m| m.as_str().to_string())
1629                            .unwrap_or_default(),
1630                        cap.get(0).unwrap().as_str().trim().to_string(),
1631                    )
1632                })
1633            })
1634            .or_else(|| {
1635                property_re.captures(line).map(|cap| {
1636                    (
1637                        NodeKind::Property,
1638                        cap.get(3).unwrap().as_str().to_string(),
1639                        cap.get(1).map(|m| m.as_str().to_string()),
1640                        cap.get(2)
1641                            .map(|m| m.as_str().to_string())
1642                            .unwrap_or_default(),
1643                        cap.get(0).unwrap().as_str().trim().to_string(),
1644                    )
1645                })
1646            });
1647
1648        if let Some((kind, name, visibility, modifiers, signature)) = member {
1649            let mut node = make_node(
1650                file_path,
1651                Language::CSharp,
1652                kind,
1653                &name,
1654                line_no,
1655                indent as i64,
1656                now,
1657                Some(csharp_signature(&pending_attributes, &signature)),
1658            );
1659            node.visibility = visibility.or_else(|| Some("private".to_string()));
1660            node.is_exported = node.visibility.as_deref() == Some("public");
1661            node.is_static = modifiers.contains("static") || signature.contains(" static ");
1662            node.is_async = modifiers.contains("async") || signature.contains(" async ");
1663            if let Some((_, type_name, parent_id)) = type_stack.last() {
1664                node.qualified_name = format!("{}.{}", type_name, name);
1665                edges.push(Edge {
1666                    id: None,
1667                    source: parent_id.clone(),
1668                    target: node.id.clone(),
1669                    kind: EdgeKind::Contains,
1670                    line: None,
1671                    col: None,
1672                    provenance: Some("csharp".into()),
1673                });
1674            } else {
1675                add_contains(nodes, edges, &node);
1676            }
1677            for (attribute, attribute_line) in &pending_attributes {
1678                refs_push(
1679                    refs,
1680                    &node.id,
1681                    attribute,
1682                    EdgeKind::Decorates,
1683                    file_path,
1684                    Language::CSharp,
1685                    *attribute_line,
1686                    0,
1687                );
1688            }
1689            nodes.push(node);
1690            pending_attributes.clear();
1691            continue;
1692        }
1693
1694        pending_attributes.clear();
1695    }
1696}
1697
1698fn csharp_signature(attributes: &[(String, i64)], declaration: &str) -> String {
1699    if attributes.is_empty() {
1700        declaration.to_string()
1701    } else {
1702        let mut lines: Vec<String> = attributes
1703            .iter()
1704            .map(|(attribute, _)| format!("[{}]", attribute))
1705            .collect();
1706        lines.push(declaration.to_string());
1707        lines.join("\n")
1708    }
1709}
1710
1711fn add_csharp_metadata_refs(
1712    node_id: &str,
1713    tail: &str,
1714    attributes: &[(String, i64)],
1715    file_path: &str,
1716    line: i64,
1717    refs: &mut Vec<UnresolvedReference>,
1718) {
1719    let base_tail = tail.trim().strip_prefix(':').unwrap_or("").trim();
1720    let mut bases = base_tail
1721        .split(',')
1722        .map(str::trim)
1723        .filter(|name| !name.is_empty())
1724        .map(|name| name.split_whitespace().next().unwrap_or(name));
1725    if let Some(base) = bases.next() {
1726        refs_push(
1727            refs,
1728            node_id,
1729            base,
1730            EdgeKind::Extends,
1731            file_path,
1732            Language::CSharp,
1733            line,
1734            0,
1735        );
1736    }
1737    for name in bases {
1738        refs_push(
1739            refs,
1740            node_id,
1741            name,
1742            EdgeKind::Implements,
1743            file_path,
1744            Language::CSharp,
1745            line,
1746            0,
1747        );
1748    }
1749    for (attribute, attribute_line) in attributes {
1750        refs_push(
1751            refs,
1752            node_id,
1753            attribute,
1754            EdgeKind::Decorates,
1755            file_path,
1756            Language::CSharp,
1757            *attribute_line,
1758            0,
1759        );
1760    }
1761}
1762
1763fn add_csharp_call_refs(
1764    file_path: &str,
1765    source: &str,
1766    nodes: &[Node],
1767    refs: &mut Vec<UnresolvedReference>,
1768) {
1769    let re = Regex::new(r"([A-Za-z_][A-Za-z0-9_\.]*)\s*(?:<[^;\n()]+>)?\s*\(").unwrap();
1770    let keywords = [
1771        "if", "for", "foreach", "while", "switch", "catch", "return", "new", "typeof", "nameof",
1772        "using",
1773    ];
1774    for cap in re.captures_iter(source) {
1775        let name_match = cap.get(1).unwrap();
1776        let name = name_match.as_str();
1777        let line = line_for(source, name_match.start());
1778        let line_text = source
1779            .lines()
1780            .nth(line.saturating_sub(1) as usize)
1781            .unwrap_or_default()
1782            .trim_start();
1783        if keywords.contains(&name)
1784            || line_text.contains(&format!("{name}("))
1785                && matches!(
1786                    line_text.split_whitespace().next(),
1787                    Some("public" | "private" | "protected" | "internal" | "static" | "async")
1788                )
1789        {
1790            continue;
1791        }
1792        if let Some(caller) = nodes
1793            .iter()
1794            .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
1795            .rev()
1796            .find(|n| n.start_line <= line)
1797        {
1798            refs_push(
1799                refs,
1800                &caller.id,
1801                name,
1802                EdgeKind::Calls,
1803                file_path,
1804                Language::CSharp,
1805                line,
1806                0,
1807            );
1808        }
1809    }
1810}
1811
1812fn extract_php_ruby(
1813    file_path: &str,
1814    source: &str,
1815    language: Language,
1816    now: i64,
1817    nodes: &mut Vec<Node>,
1818    edges: &mut Vec<Edge>,
1819    refs: &mut Vec<UnresolvedReference>,
1820) {
1821    match language {
1822        Language::Php => extract_php(file_path, source, now, nodes, edges, refs),
1823        Language::Ruby => extract_ruby(file_path, source, now, nodes, edges, refs),
1824        _ => {}
1825    }
1826}
1827
1828fn extract_php(
1829    file_path: &str,
1830    source: &str,
1831    now: i64,
1832    nodes: &mut Vec<Node>,
1833    edges: &mut Vec<Edge>,
1834    refs: &mut Vec<UnresolvedReference>,
1835) {
1836    add_php_uses(file_path, source, now, nodes, edges, refs);
1837    add_php_symbols(file_path, source, now, nodes, edges, refs);
1838    add_php_call_refs(file_path, source, nodes, refs);
1839}
1840
1841fn add_php_uses(
1842    file_path: &str,
1843    source: &str,
1844    now: i64,
1845    nodes: &mut Vec<Node>,
1846    edges: &mut Vec<Edge>,
1847    refs: &mut Vec<UnresolvedReference>,
1848) {
1849    let single_re =
1850        Regex::new(r"(?m)^\s*use\s+((?:function\s+|const\s+)?[A-Za-z_\\][A-Za-z0-9_\\]*)(?:\s+as\s+[A-Za-z_][A-Za-z0-9_]*)?\s*;")
1851            .unwrap();
1852    for cap in single_re.captures_iter(source) {
1853        let module = cap.get(1).unwrap().as_str().trim();
1854        add_php_import_node(
1855            file_path,
1856            module,
1857            cap.get(0).unwrap().as_str().trim(),
1858            line_for(source, cap.get(1).unwrap().start()),
1859            now,
1860            nodes,
1861            edges,
1862            refs,
1863        );
1864    }
1865
1866    let group_re =
1867        Regex::new(r"(?ms)^\s*use\s+([A-Za-z_\\][A-Za-z0-9_\\]*)\\\s*\{(?P<body>.*?)\}\s*;")
1868            .unwrap();
1869    let item_re =
1870        Regex::new(r"(?m)([A-Za-z_][A-Za-z0-9_]*)(?:\s+as\s+[A-Za-z_][A-Za-z0-9_]*)?").unwrap();
1871    for cap in group_re.captures_iter(source) {
1872        let prefix = cap.get(1).unwrap().as_str();
1873        let Some(body) = cap.name("body") else {
1874            continue;
1875        };
1876        for item in item_re.captures_iter(body.as_str()) {
1877            let leaf = item.get(1).unwrap();
1878            let module = format!("{prefix}\\{}", leaf.as_str());
1879            add_php_import_node(
1880                file_path,
1881                &module,
1882                item.get(0).unwrap().as_str().trim(),
1883                line_for(source, body.start() + leaf.start()),
1884                now,
1885                nodes,
1886                edges,
1887                refs,
1888            );
1889        }
1890    }
1891}
1892
1893#[allow(clippy::too_many_arguments)]
1894fn add_php_import_node(
1895    file_path: &str,
1896    module: &str,
1897    signature: &str,
1898    line: i64,
1899    now: i64,
1900    nodes: &mut Vec<Node>,
1901    edges: &mut Vec<Edge>,
1902    refs: &mut Vec<UnresolvedReference>,
1903) {
1904    let Some(file_id) = nodes.first().map(|node| node.id.clone()) else {
1905        return;
1906    };
1907    let node = make_node(
1908        file_path,
1909        Language::Php,
1910        NodeKind::Import,
1911        module,
1912        line,
1913        0,
1914        now,
1915        Some(signature.to_string()),
1916    );
1917    add_contains(nodes, edges, &node);
1918    refs_push(
1919        refs,
1920        &file_id,
1921        module,
1922        EdgeKind::Imports,
1923        file_path,
1924        Language::Php,
1925        line,
1926        0,
1927    );
1928    nodes.push(node);
1929}
1930
1931fn add_php_symbols(
1932    file_path: &str,
1933    source: &str,
1934    now: i64,
1935    nodes: &mut Vec<Node>,
1936    edges: &mut Vec<Edge>,
1937    refs: &mut Vec<UnresolvedReference>,
1938) {
1939    let type_re = Regex::new(
1940        r"^\s*(?:(abstract|final)\s+)?(class|interface|trait|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{]*)\{?",
1941    )
1942    .unwrap();
1943    let function_re = Regex::new(
1944        r"^\s*(?:(public|private|protected)\s+)?((?:static|abstract|final)\s+)*function\s+([A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)",
1945    )
1946    .unwrap();
1947    let mut type_stack: Vec<(usize, String, String)> = Vec::new();
1948
1949    for (idx, line) in source.lines().enumerate() {
1950        let line_no = idx as i64 + 1;
1951        let trimmed = line.trim();
1952        if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with('#') {
1953            continue;
1954        }
1955        let indent = python_indent_width(line);
1956        while type_stack
1957            .last()
1958            .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
1959        {
1960            type_stack.pop();
1961        }
1962
1963        if let Some(cap) = type_re.captures(line) {
1964            let kind = match cap.get(2).unwrap().as_str() {
1965                "interface" => NodeKind::Interface,
1966                "trait" => NodeKind::Trait,
1967                "enum" => NodeKind::Enum,
1968                _ => NodeKind::Class,
1969            };
1970            let name = cap.get(3).unwrap();
1971            let mut node = make_node(
1972                file_path,
1973                Language::Php,
1974                kind,
1975                name.as_str(),
1976                line_no,
1977                indent as i64,
1978                now,
1979                Some(trimmed.to_string()),
1980            );
1981            node.visibility = Some("public".to_string());
1982            node.is_exported = true;
1983            add_contains(nodes, edges, &node);
1984            add_php_inheritance_refs(
1985                &node.id,
1986                cap.get(4).map(|m| m.as_str()).unwrap_or_default(),
1987                file_path,
1988                line_no,
1989                refs,
1990            );
1991            if !trimmed.contains('}') {
1992                type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
1993            }
1994            nodes.push(node);
1995            continue;
1996        }
1997
1998        if let Some(cap) = function_re.captures(line) {
1999            let name = cap.get(3).unwrap().as_str();
2000            let kind = if type_stack.is_empty() {
2001                NodeKind::Function
2002            } else {
2003                NodeKind::Method
2004            };
2005            let mut node = make_node(
2006                file_path,
2007                Language::Php,
2008                kind,
2009                name,
2010                line_no,
2011                indent as i64,
2012                now,
2013                Some(trimmed.to_string()),
2014            );
2015            node.visibility = cap
2016                .get(1)
2017                .map(|m| m.as_str().to_string())
2018                .or_else(|| Some("public".to_string()));
2019            node.is_exported = node.visibility.as_deref() == Some("public");
2020            node.is_static = cap
2021                .get(2)
2022                .map(|m| m.as_str().contains("static"))
2023                .unwrap_or(false)
2024                || trimmed.contains(" static ");
2025            if let Some((_, type_name, parent_id)) = type_stack.last() {
2026                node.qualified_name = format!("{}::{}", type_name, name);
2027                edges.push(Edge {
2028                    id: None,
2029                    source: parent_id.clone(),
2030                    target: node.id.clone(),
2031                    kind: EdgeKind::Contains,
2032                    line: None,
2033                    col: None,
2034                    provenance: Some("php".into()),
2035                });
2036            } else {
2037                add_contains(nodes, edges, &node);
2038            }
2039            nodes.push(node);
2040            continue;
2041        }
2042
2043        if trimmed.starts_with("use ") && !type_stack.is_empty() {
2044            let Some((_, _, parent_id)) = type_stack.last() else {
2045                continue;
2046            };
2047            for name in trimmed
2048                .trim_start_matches("use ")
2049                .trim_end_matches(';')
2050                .split(',')
2051                .map(str::trim)
2052                .filter(|name| !name.is_empty())
2053            {
2054                refs_push(
2055                    refs,
2056                    parent_id,
2057                    name,
2058                    EdgeKind::Implements,
2059                    file_path,
2060                    Language::Php,
2061                    line_no,
2062                    0,
2063                );
2064            }
2065        }
2066    }
2067}
2068
2069fn add_php_inheritance_refs(
2070    node_id: &str,
2071    tail: &str,
2072    file_path: &str,
2073    line: i64,
2074    refs: &mut Vec<UnresolvedReference>,
2075) {
2076    let extends_re = Regex::new(r"\bextends\s+([A-Za-z_\\][A-Za-z0-9_\\]*)").unwrap();
2077    let implements_re = Regex::new(r"\bimplements\s+([A-Za-z_\\][A-Za-z0-9_\\,\s]*)").unwrap();
2078    if let Some(cap) = extends_re.captures(tail) {
2079        refs_push(
2080            refs,
2081            node_id,
2082            cap.get(1).unwrap().as_str(),
2083            EdgeKind::Extends,
2084            file_path,
2085            Language::Php,
2086            line,
2087            0,
2088        );
2089    }
2090    if let Some(cap) = implements_re.captures(tail) {
2091        for name in cap[1]
2092            .split(',')
2093            .map(str::trim)
2094            .filter(|name| !name.is_empty())
2095        {
2096            refs_push(
2097                refs,
2098                node_id,
2099                name,
2100                EdgeKind::Implements,
2101                file_path,
2102                Language::Php,
2103                line,
2104                0,
2105            );
2106        }
2107    }
2108}
2109
2110fn add_php_call_refs(
2111    file_path: &str,
2112    source: &str,
2113    nodes: &[Node],
2114    refs: &mut Vec<UnresolvedReference>,
2115) {
2116    let re = Regex::new(r"([A-Za-z_\\][A-Za-z0-9_\\]*(?:::[A-Za-z_][A-Za-z0-9_]*)?)\s*\(").unwrap();
2117    let keywords = [
2118        "if", "for", "foreach", "while", "switch", "catch", "return", "function",
2119    ];
2120    for cap in re.captures_iter(source) {
2121        let name_match = cap.get(1).unwrap();
2122        let name = name_match.as_str();
2123        let line = line_for(source, name_match.start());
2124        let line_text = source
2125            .lines()
2126            .nth(line.saturating_sub(1) as usize)
2127            .unwrap_or_default()
2128            .trim_start();
2129        if keywords.contains(&name) || line_text.contains(&format!("function {name}(")) {
2130            continue;
2131        }
2132        if let Some(caller) = nodes
2133            .iter()
2134            .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
2135            .rev()
2136            .find(|n| n.start_line <= line)
2137        {
2138            refs_push(
2139                refs,
2140                &caller.id,
2141                name,
2142                EdgeKind::Calls,
2143                file_path,
2144                Language::Php,
2145                line,
2146                0,
2147            );
2148        }
2149    }
2150}
2151
2152fn extract_ruby(
2153    file_path: &str,
2154    source: &str,
2155    now: i64,
2156    nodes: &mut Vec<Node>,
2157    edges: &mut Vec<Edge>,
2158    refs: &mut Vec<UnresolvedReference>,
2159) {
2160    let import_re = Regex::new(r#"^\s*(require|require_relative)\s+["']([^"']+)["']"#).unwrap();
2161    let module_re = Regex::new(r"^\s*module\s+([A-Z][A-Za-z0-9_:]*)").unwrap();
2162    let class_re =
2163        Regex::new(r"^\s*class\s+([A-Z][A-Za-z0-9_:]*)(?:\s*<\s*([A-Z][A-Za-z0-9_:]*))?").unwrap();
2164    let method_re = Regex::new(r"^\s*def\s+(?:(self)\.)?([A-Za-z_][A-Za-z0-9_!?=]*)").unwrap();
2165    let mut stack: Vec<(usize, NodeKind, String, String)> = Vec::new();
2166
2167    for (idx, line) in source.lines().enumerate() {
2168        let line_no = idx as i64 + 1;
2169        let trimmed = line.trim();
2170        if trimmed.is_empty() || trimmed.starts_with('#') {
2171            continue;
2172        }
2173        let indent = python_indent_width(line);
2174        if trimmed == "end" {
2175            stack.pop();
2176            continue;
2177        }
2178
2179        if let Some(cap) = import_re.captures(line) {
2180            let module = cap.get(2).unwrap();
2181            let node = make_node(
2182                file_path,
2183                Language::Ruby,
2184                NodeKind::Import,
2185                module.as_str(),
2186                line_no,
2187                indent as i64,
2188                now,
2189                Some(trimmed.to_string()),
2190            );
2191            add_contains(nodes, edges, &node);
2192            refs_push(
2193                refs,
2194                &nodes[0].id,
2195                module.as_str(),
2196                EdgeKind::Imports,
2197                file_path,
2198                Language::Ruby,
2199                line_no,
2200                0,
2201            );
2202            nodes.push(node);
2203            continue;
2204        }
2205
2206        if let Some(cap) = module_re.captures(line) {
2207            let name = cap.get(1).unwrap();
2208            let node = make_node(
2209                file_path,
2210                Language::Ruby,
2211                NodeKind::Module,
2212                name.as_str(),
2213                line_no,
2214                indent as i64,
2215                now,
2216                Some(trimmed.to_string()),
2217            );
2218            add_contains_from_ruby_stack(nodes, edges, &stack, &node);
2219            stack.push((
2220                indent,
2221                NodeKind::Module,
2222                name.as_str().to_string(),
2223                node.id.clone(),
2224            ));
2225            nodes.push(node);
2226            continue;
2227        }
2228
2229        if let Some(cap) = class_re.captures(line) {
2230            let name = cap.get(1).unwrap();
2231            let node = make_node(
2232                file_path,
2233                Language::Ruby,
2234                NodeKind::Class,
2235                name.as_str(),
2236                line_no,
2237                indent as i64,
2238                now,
2239                Some(trimmed.to_string()),
2240            );
2241            add_contains_from_ruby_stack(nodes, edges, &stack, &node);
2242            if let Some(parent) = cap.get(2) {
2243                refs_push(
2244                    refs,
2245                    &node.id,
2246                    parent.as_str(),
2247                    EdgeKind::Extends,
2248                    file_path,
2249                    Language::Ruby,
2250                    line_no,
2251                    0,
2252                );
2253            }
2254            stack.push((
2255                indent,
2256                NodeKind::Class,
2257                name.as_str().to_string(),
2258                node.id.clone(),
2259            ));
2260            nodes.push(node);
2261            continue;
2262        }
2263
2264        if let Some(cap) = method_re.captures(line) {
2265            let name = cap.get(2).unwrap().as_str();
2266            let mut node = make_node(
2267                file_path,
2268                Language::Ruby,
2269                NodeKind::Method,
2270                name,
2271                line_no,
2272                indent as i64,
2273                now,
2274                Some(trimmed.to_string()),
2275            );
2276            node.visibility = Some("public".to_string());
2277            node.is_exported = true;
2278            node.is_static = cap.get(1).is_some();
2279            if let Some((_, _, owner_name, _)) = stack
2280                .iter()
2281                .rev()
2282                .find(|(_, kind, _, _)| matches!(kind, NodeKind::Class | NodeKind::Module))
2283            {
2284                let sep = if node.is_static { "." } else { "#" };
2285                node.qualified_name = format!("{owner_name}{sep}{name}");
2286            }
2287            add_contains_from_ruby_stack(nodes, edges, &stack, &node);
2288            nodes.push(node);
2289        }
2290    }
2291
2292    add_call_refs(
2293        file_path,
2294        source,
2295        Language::Ruby,
2296        nodes,
2297        refs,
2298        r"([A-Za-z_][A-Za-z0-9_!?=]*)\s*\(",
2299    );
2300}
2301
2302fn add_contains_from_ruby_stack(
2303    nodes: &[Node],
2304    edges: &mut Vec<Edge>,
2305    stack: &[(usize, NodeKind, String, String)],
2306    node: &Node,
2307) {
2308    if let Some((_, _, _, source)) = stack.last() {
2309        edges.push(Edge {
2310            id: None,
2311            source: source.clone(),
2312            target: node.id.clone(),
2313            kind: EdgeKind::Contains,
2314            line: None,
2315            col: None,
2316            provenance: Some("ruby".into()),
2317        });
2318    } else {
2319        add_contains(nodes, edges, node);
2320    }
2321}
2322
2323fn extract_swift(
2324    file_path: &str,
2325    source: &str,
2326    now: i64,
2327    nodes: &mut Vec<Node>,
2328    edges: &mut Vec<Edge>,
2329    refs: &mut Vec<UnresolvedReference>,
2330) {
2331    add_swift_imports(file_path, source, now, nodes, edges, refs);
2332    add_swift_symbols(file_path, source, now, nodes, edges, refs);
2333    add_call_refs(
2334        file_path,
2335        source,
2336        Language::Swift,
2337        nodes,
2338        refs,
2339        r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
2340    );
2341}
2342
2343fn add_swift_imports(
2344    file_path: &str,
2345    source: &str,
2346    now: i64,
2347    nodes: &mut Vec<Node>,
2348    edges: &mut Vec<Edge>,
2349    refs: &mut Vec<UnresolvedReference>,
2350) {
2351    let re = Regex::new(r"(?m)^\s*import\s+([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
2352    for cap in re.captures_iter(source) {
2353        let module = cap.get(1).unwrap();
2354        let node = make_node(
2355            file_path,
2356            Language::Swift,
2357            NodeKind::Import,
2358            module.as_str(),
2359            line_for(source, module.start()),
2360            0,
2361            now,
2362            cap.get(0).map(|m| m.as_str().trim().to_string()),
2363        );
2364        add_contains(nodes, edges, &node);
2365        refs_push(
2366            refs,
2367            &nodes[0].id,
2368            module.as_str(),
2369            EdgeKind::Imports,
2370            file_path,
2371            Language::Swift,
2372            node.start_line,
2373            0,
2374        );
2375        nodes.push(node);
2376    }
2377}
2378
2379fn add_swift_symbols(
2380    file_path: &str,
2381    source: &str,
2382    now: i64,
2383    nodes: &mut Vec<Node>,
2384    edges: &mut Vec<Edge>,
2385    refs: &mut Vec<UnresolvedReference>,
2386) {
2387    let type_re = Regex::new(
2388        r"^\s*(?:(public|private|internal|fileprivate|open)\s+)?(?:(?:final|open)\s+)*(class|struct|protocol|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{]*)\{?",
2389    )
2390    .unwrap();
2391    let function_re = Regex::new(
2392        r"^\s*(?:(public|private|internal|fileprivate|open)\s+)?((?:static|class|mutating|async|final|override)\s+)*func\s+([A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?:async\s+)?(?:throws\s+)?(?:->\s*[A-Za-z_][A-Za-z0-9_<>,\.\[\]\?]*)?",
2393    )
2394    .unwrap();
2395    let typealias_re =
2396        Regex::new(r"^\s*(?:(public|private|internal|fileprivate|open)\s+)?typealias\s+([A-Za-z_][A-Za-z0-9_]*)\s*=")
2397            .unwrap();
2398    let mut type_stack: Vec<(usize, String, String)> = Vec::new();
2399
2400    for (idx, line) in source.lines().enumerate() {
2401        let line_no = idx as i64 + 1;
2402        let trimmed = line.trim();
2403        if trimmed.is_empty() || trimmed.starts_with("//") {
2404            continue;
2405        }
2406        let indent = python_indent_width(line);
2407        while type_stack
2408            .last()
2409            .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
2410        {
2411            type_stack.pop();
2412        }
2413
2414        if let Some(cap) = type_re.captures(line) {
2415            let kind = match cap.get(2).unwrap().as_str() {
2416                "struct" => NodeKind::Struct,
2417                "protocol" => NodeKind::Protocol,
2418                "enum" => NodeKind::Enum,
2419                _ => NodeKind::Class,
2420            };
2421            let name = cap.get(3).unwrap();
2422            let mut node = make_node(
2423                file_path,
2424                Language::Swift,
2425                kind,
2426                name.as_str(),
2427                line_no,
2428                indent as i64,
2429                now,
2430                Some(trimmed.to_string()),
2431            );
2432            node.visibility = cap
2433                .get(1)
2434                .map(|m| swift_visibility(m.as_str()).to_string())
2435                .or_else(|| Some("internal".to_string()));
2436            node.is_exported = matches!(node.visibility.as_deref(), Some("public" | "open"));
2437            add_contains(nodes, edges, &node);
2438            add_swift_inheritance_refs(
2439                &node.id,
2440                cap.get(4).map(|m| m.as_str()).unwrap_or_default(),
2441                file_path,
2442                line_no,
2443                refs,
2444            );
2445            if !trimmed.contains('}') {
2446                type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
2447            }
2448            nodes.push(node);
2449            continue;
2450        }
2451
2452        if let Some(cap) = function_re.captures(line) {
2453            let name = cap.get(3).unwrap().as_str();
2454            let kind = if type_stack.is_empty() {
2455                NodeKind::Function
2456            } else {
2457                NodeKind::Method
2458            };
2459            let mut node = make_node(
2460                file_path,
2461                Language::Swift,
2462                kind,
2463                name,
2464                line_no,
2465                indent as i64,
2466                now,
2467                Some(trimmed.to_string()),
2468            );
2469            node.visibility = cap
2470                .get(1)
2471                .map(|m| swift_visibility(m.as_str()).to_string())
2472                .or_else(|| Some("internal".to_string()));
2473            node.is_exported = matches!(node.visibility.as_deref(), Some("public" | "open"));
2474            let modifiers = cap.get(2).map(|m| m.as_str()).unwrap_or_default();
2475            node.is_static = modifiers.contains("static")
2476                || modifiers.contains("class")
2477                || trimmed.contains(" static ")
2478                || trimmed.contains(" class ");
2479            node.is_async = modifiers.contains("async") || trimmed.contains(" async ");
2480            if let Some((_, type_name, parent_id)) = type_stack.last() {
2481                node.qualified_name = format!("{}.{}", type_name, name);
2482                edges.push(Edge {
2483                    id: None,
2484                    source: parent_id.clone(),
2485                    target: node.id.clone(),
2486                    kind: EdgeKind::Contains,
2487                    line: None,
2488                    col: None,
2489                    provenance: Some("swift".into()),
2490                });
2491            } else {
2492                add_contains(nodes, edges, &node);
2493            }
2494            nodes.push(node);
2495            continue;
2496        }
2497
2498        if let Some(cap) = typealias_re.captures(line) {
2499            let name = cap.get(2).unwrap();
2500            let mut node = make_node(
2501                file_path,
2502                Language::Swift,
2503                NodeKind::TypeAlias,
2504                name.as_str(),
2505                line_no,
2506                indent as i64,
2507                now,
2508                Some(trimmed.to_string()),
2509            );
2510            node.visibility = cap
2511                .get(1)
2512                .map(|m| swift_visibility(m.as_str()).to_string())
2513                .or_else(|| Some("internal".to_string()));
2514            node.is_exported = matches!(node.visibility.as_deref(), Some("public" | "open"));
2515            add_contains(nodes, edges, &node);
2516            nodes.push(node);
2517        }
2518    }
2519}
2520
2521fn swift_visibility(visibility: &str) -> &str {
2522    match visibility {
2523        "fileprivate" => "private",
2524        other => other,
2525    }
2526}
2527
2528fn add_swift_inheritance_refs(
2529    node_id: &str,
2530    tail: &str,
2531    file_path: &str,
2532    line: i64,
2533    refs: &mut Vec<UnresolvedReference>,
2534) {
2535    let Some(base_tail) = tail.trim().strip_prefix(':') else {
2536        return;
2537    };
2538    let mut names = base_tail
2539        .split(',')
2540        .map(str::trim)
2541        .filter(|name| !name.is_empty())
2542        .map(|name| name.split_whitespace().next().unwrap_or(name));
2543    if let Some(first) = names.next() {
2544        refs_push(
2545            refs,
2546            node_id,
2547            first,
2548            EdgeKind::Extends,
2549            file_path,
2550            Language::Swift,
2551            line,
2552            0,
2553        );
2554    }
2555    for name in names {
2556        refs_push(
2557            refs,
2558            node_id,
2559            name,
2560            EdgeKind::Implements,
2561            file_path,
2562            Language::Swift,
2563            line,
2564            0,
2565        );
2566    }
2567}
2568
2569fn extract_dart_pascal_scala(
2570    file_path: &str,
2571    source: &str,
2572    language: Language,
2573    now: i64,
2574    nodes: &mut Vec<Node>,
2575    edges: &mut Vec<Edge>,
2576    refs: &mut Vec<UnresolvedReference>,
2577) {
2578    match language {
2579        Language::Dart => extract_dart(file_path, source, now, nodes, edges, refs),
2580        Language::Pascal => extract_pascal(file_path, source, now, nodes, edges, refs),
2581        Language::Scala => extract_scala(file_path, source, now, nodes, edges, refs),
2582        _ => {}
2583    }
2584}
2585
2586fn extract_dart(
2587    file_path: &str,
2588    source: &str,
2589    now: i64,
2590    nodes: &mut Vec<Node>,
2591    edges: &mut Vec<Edge>,
2592    refs: &mut Vec<UnresolvedReference>,
2593) {
2594    let import_re = Regex::new(
2595        r#"(?m)^\s*(?:import|export)\s+['"]([^'"]+)['"](?:\s+as\s+[A-Za-z_][A-Za-z0-9_]*)?\s*;"#,
2596    )
2597    .unwrap();
2598    for cap in import_re.captures_iter(source) {
2599        let module = cap.get(1).unwrap();
2600        let node = make_node(
2601            file_path,
2602            Language::Dart,
2603            NodeKind::Import,
2604            module.as_str(),
2605            line_for(source, module.start()),
2606            0,
2607            now,
2608            cap.get(0).map(|m| m.as_str().trim().to_string()),
2609        );
2610        add_contains(nodes, edges, &node);
2611        refs_push(
2612            refs,
2613            &nodes[0].id,
2614            module.as_str(),
2615            EdgeKind::Imports,
2616            file_path,
2617            Language::Dart,
2618            node.start_line,
2619            0,
2620        );
2621        nodes.push(node);
2622    }
2623
2624    let type_re =
2625        Regex::new(r"^\s*(class|mixin|extension|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{]*)\{?")
2626            .unwrap();
2627    let function_re = Regex::new(
2628        r"^\s*(?:static\s+)?(?:[A-Za-z_][A-Za-z0-9_<>,\?\[\]]*\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\([^;{}]*\)\s*(?:async\s*)?\{?",
2629    )
2630    .unwrap();
2631    let typealias_re = Regex::new(r"^\s*typedef\s+([A-Za-z_][A-Za-z0-9_]*)\s*=").unwrap();
2632    let mut type_stack: Vec<(usize, String, String)> = Vec::new();
2633    for (idx, line) in source.lines().enumerate() {
2634        let line_no = idx as i64 + 1;
2635        let trimmed = line.trim();
2636        if trimmed.is_empty() || trimmed.starts_with("//") {
2637            continue;
2638        }
2639        let indent = python_indent_width(line);
2640        while type_stack
2641            .last()
2642            .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
2643        {
2644            type_stack.pop();
2645        }
2646        if let Some(cap) = type_re.captures(line) {
2647            let kind = match cap.get(1).unwrap().as_str() {
2648                "enum" => NodeKind::Enum,
2649                "mixin" | "extension" => NodeKind::Trait,
2650                _ => NodeKind::Class,
2651            };
2652            let name = cap.get(2).unwrap();
2653            let mut node = make_node(
2654                file_path,
2655                Language::Dart,
2656                kind,
2657                name.as_str(),
2658                line_no,
2659                indent as i64,
2660                now,
2661                Some(trimmed.to_string()),
2662            );
2663            node.visibility = Some(dart_visibility(name.as_str()).to_string());
2664            node.is_exported = node.visibility.as_deref() == Some("public");
2665            add_contains(nodes, edges, &node);
2666            add_dart_inheritance_refs(
2667                &node.id,
2668                cap.get(3).map(|m| m.as_str()).unwrap_or_default(),
2669                file_path,
2670                line_no,
2671                refs,
2672            );
2673            if !trimmed.contains('}') {
2674                type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
2675            }
2676            nodes.push(node);
2677            continue;
2678        }
2679        if let Some(cap) = typealias_re.captures(line) {
2680            let name = cap.get(1).unwrap();
2681            let mut node = make_node(
2682                file_path,
2683                Language::Dart,
2684                NodeKind::TypeAlias,
2685                name.as_str(),
2686                line_no,
2687                indent as i64,
2688                now,
2689                Some(trimmed.to_string()),
2690            );
2691            node.visibility = Some(dart_visibility(name.as_str()).to_string());
2692            node.is_exported = node.visibility.as_deref() == Some("public");
2693            add_contains(nodes, edges, &node);
2694            nodes.push(node);
2695            continue;
2696        }
2697        if let Some(cap) = function_re.captures(line) {
2698            let name = cap.get(1).unwrap().as_str();
2699            if matches!(name, "if" | "for" | "while" | "switch" | "return") {
2700                continue;
2701            }
2702            if !trimmed.contains('{') {
2703                continue;
2704            }
2705            let kind = if type_stack.is_empty() {
2706                NodeKind::Function
2707            } else {
2708                NodeKind::Method
2709            };
2710            let mut node = make_node(
2711                file_path,
2712                Language::Dart,
2713                kind,
2714                name,
2715                line_no,
2716                indent as i64,
2717                now,
2718                Some(trimmed.to_string()),
2719            );
2720            node.visibility = Some(dart_visibility(name).to_string());
2721            node.is_exported = node.visibility.as_deref() == Some("public");
2722            node.is_static = trimmed.starts_with("static ") || trimmed.contains(" static ");
2723            node.is_async = trimmed.contains(" async");
2724            if let Some((_, type_name, parent_id)) = type_stack.last() {
2725                node.qualified_name = format!("{}.{}", type_name, name);
2726                edges.push(Edge {
2727                    id: None,
2728                    source: parent_id.clone(),
2729                    target: node.id.clone(),
2730                    kind: EdgeKind::Contains,
2731                    line: None,
2732                    col: None,
2733                    provenance: Some("dart".into()),
2734                });
2735            } else {
2736                add_contains(nodes, edges, &node);
2737            }
2738            nodes.push(node);
2739        }
2740    }
2741    add_call_refs(
2742        file_path,
2743        source,
2744        Language::Dart,
2745        nodes,
2746        refs,
2747        r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
2748    );
2749}
2750
2751fn dart_visibility(name: &str) -> &str {
2752    if name.starts_with('_') {
2753        "private"
2754    } else {
2755        "public"
2756    }
2757}
2758
2759fn add_dart_inheritance_refs(
2760    node_id: &str,
2761    tail: &str,
2762    file_path: &str,
2763    line: i64,
2764    refs: &mut Vec<UnresolvedReference>,
2765) {
2766    let extends_re = Regex::new(r"\bextends\s+([A-Za-z_][A-Za-z0-9_]*)").unwrap();
2767    let implements_re = Regex::new(r"\bimplements\s+([A-Za-z_][A-Za-z0-9_,\s]*)").unwrap();
2768    let with_re = Regex::new(r"\bwith\s+([A-Za-z_][A-Za-z0-9_,\s]*)").unwrap();
2769    let on_re = Regex::new(r"\bon\s+([A-Za-z_][A-Za-z0-9_,\s]*)").unwrap();
2770    if let Some(cap) = extends_re.captures(tail).or_else(|| on_re.captures(tail)) {
2771        refs_push(
2772            refs,
2773            node_id,
2774            cap.get(1).unwrap().as_str(),
2775            EdgeKind::Extends,
2776            file_path,
2777            Language::Dart,
2778            line,
2779            0,
2780        );
2781    }
2782    for re in [&implements_re, &with_re] {
2783        if let Some(cap) = re.captures(tail) {
2784            for name in cap[1]
2785                .split(',')
2786                .map(str::trim)
2787                .filter(|name| !name.is_empty())
2788            {
2789                refs_push(
2790                    refs,
2791                    node_id,
2792                    name,
2793                    EdgeKind::Implements,
2794                    file_path,
2795                    Language::Dart,
2796                    line,
2797                    0,
2798                );
2799            }
2800        }
2801    }
2802}
2803
2804fn extract_pascal(
2805    file_path: &str,
2806    source: &str,
2807    now: i64,
2808    nodes: &mut Vec<Node>,
2809    edges: &mut Vec<Edge>,
2810    refs: &mut Vec<UnresolvedReference>,
2811) {
2812    let unit_re = Regex::new(r"(?i)^\s*unit\s+([A-Za-z_][A-Za-z0-9_]*)\s*;").unwrap();
2813    let uses_re = Regex::new(r"(?i)^\s*uses\s+([^;]+);").unwrap();
2814    let class_re =
2815        Regex::new(r"(?i)^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*class(?:\(([^)]*)\))?").unwrap();
2816    let proc_re = Regex::new(
2817        r"(?i)^\s*(?:class\s+)?(procedure|function)\s+([A-Za-z_][A-Za-z0-9_\.]*)\s*(?:\([^;]*\))?\s*(?::\s*[A-Za-z_][A-Za-z0-9_]*)?\s*;",
2818    )
2819    .unwrap();
2820    let mut current_class: Option<(String, String)> = None;
2821
2822    for (idx, line) in source.lines().enumerate() {
2823        let line_no = idx as i64 + 1;
2824        let trimmed = line.trim();
2825        if trimmed.is_empty() || trimmed.starts_with("//") {
2826            continue;
2827        }
2828        if let Some(cap) = unit_re.captures(line) {
2829            let name = cap.get(1).unwrap();
2830            let node = make_node(
2831                file_path,
2832                Language::Pascal,
2833                NodeKind::Module,
2834                name.as_str(),
2835                line_no,
2836                0,
2837                now,
2838                Some(trimmed.to_string()),
2839            );
2840            add_contains(nodes, edges, &node);
2841            nodes.push(node);
2842            continue;
2843        }
2844        if let Some(cap) = uses_re.captures(line) {
2845            for module in cap[1].split(',').map(str::trim).filter(|m| !m.is_empty()) {
2846                let node = make_node(
2847                    file_path,
2848                    Language::Pascal,
2849                    NodeKind::Import,
2850                    module,
2851                    line_no,
2852                    0,
2853                    now,
2854                    Some(trimmed.to_string()),
2855                );
2856                add_contains(nodes, edges, &node);
2857                refs_push(
2858                    refs,
2859                    &nodes[0].id,
2860                    module,
2861                    EdgeKind::Imports,
2862                    file_path,
2863                    Language::Pascal,
2864                    line_no,
2865                    0,
2866                );
2867                nodes.push(node);
2868            }
2869            continue;
2870        }
2871        if let Some(cap) = class_re.captures(line) {
2872            let name = cap.get(1).unwrap();
2873            let node = make_node(
2874                file_path,
2875                Language::Pascal,
2876                NodeKind::Class,
2877                name.as_str(),
2878                line_no,
2879                0,
2880                now,
2881                Some(trimmed.to_string()),
2882            );
2883            add_contains(nodes, edges, &node);
2884            if let Some(parent) = cap.get(2) {
2885                refs_push(
2886                    refs,
2887                    &node.id,
2888                    parent.as_str().trim(),
2889                    EdgeKind::Extends,
2890                    file_path,
2891                    Language::Pascal,
2892                    line_no,
2893                    0,
2894                );
2895            }
2896            current_class = Some((name.as_str().to_string(), node.id.clone()));
2897            nodes.push(node);
2898            continue;
2899        }
2900        if trimmed.eq_ignore_ascii_case("end;") {
2901            current_class = None;
2902            continue;
2903        }
2904        if let Some(cap) = proc_re.captures(line) {
2905            let raw_name = cap.get(2).unwrap().as_str();
2906            let name = raw_name.rsplit('.').next().unwrap_or(raw_name);
2907            let kind = if raw_name.contains('.') || current_class.is_some() {
2908                NodeKind::Method
2909            } else {
2910                NodeKind::Function
2911            };
2912            let mut node = make_node(
2913                file_path,
2914                Language::Pascal,
2915                kind,
2916                name,
2917                line_no,
2918                0,
2919                now,
2920                Some(trimmed.to_string()),
2921            );
2922            node.is_static = trimmed.to_lowercase().starts_with("class ");
2923            if raw_name.contains('.') {
2924                let owner = raw_name
2925                    .rsplit_once('.')
2926                    .map(|(owner, _)| owner)
2927                    .unwrap_or(raw_name);
2928                node.qualified_name = format!("{}.{}", owner, name);
2929            } else if let Some((owner, parent_id)) = &current_class {
2930                node.qualified_name = format!("{}.{}", owner, name);
2931                edges.push(Edge {
2932                    id: None,
2933                    source: parent_id.clone(),
2934                    target: node.id.clone(),
2935                    kind: EdgeKind::Contains,
2936                    line: None,
2937                    col: None,
2938                    provenance: Some("pascal".into()),
2939                });
2940                nodes.push(node);
2941                continue;
2942            }
2943            add_contains(nodes, edges, &node);
2944            nodes.push(node);
2945        }
2946    }
2947    add_call_refs(
2948        file_path,
2949        source,
2950        Language::Pascal,
2951        nodes,
2952        refs,
2953        r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
2954    );
2955    add_pascal_bare_call_refs(file_path, source, nodes, refs);
2956}
2957
2958fn add_pascal_bare_call_refs(
2959    file_path: &str,
2960    source: &str,
2961    nodes: &[Node],
2962    refs: &mut Vec<UnresolvedReference>,
2963) {
2964    let re = Regex::new(r"(?im)^\s*([A-Za-z_][A-Za-z0-9_]*\.[A-Za-z_][A-Za-z0-9_]*)\s*;").unwrap();
2965    for cap in re.captures_iter(source) {
2966        let name = cap.get(1).unwrap();
2967        let line = line_for(source, name.start());
2968        if let Some(caller) = nodes
2969            .iter()
2970            .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
2971            .rev()
2972            .find(|n| n.start_line <= line)
2973        {
2974            refs_push(
2975                refs,
2976                &caller.id,
2977                name.as_str(),
2978                EdgeKind::Calls,
2979                file_path,
2980                Language::Pascal,
2981                line,
2982                0,
2983            );
2984        }
2985    }
2986}
2987
2988fn extract_scala(
2989    file_path: &str,
2990    source: &str,
2991    now: i64,
2992    nodes: &mut Vec<Node>,
2993    edges: &mut Vec<Edge>,
2994    refs: &mut Vec<UnresolvedReference>,
2995) {
2996    let import_re = Regex::new(r"(?m)^\s*import\s+([A-Za-z_][A-Za-z0-9_\.\{\}, \t]*)").unwrap();
2997    for cap in import_re.captures_iter(source) {
2998        let module = cap.get(1).unwrap().as_str().trim();
2999        let node = make_node(
3000            file_path,
3001            Language::Scala,
3002            NodeKind::Import,
3003            module,
3004            line_for(source, cap.get(1).unwrap().start()),
3005            0,
3006            now,
3007            cap.get(0).map(|m| m.as_str().trim().to_string()),
3008        );
3009        add_contains(nodes, edges, &node);
3010        refs_push(
3011            refs,
3012            &nodes[0].id,
3013            module,
3014            EdgeKind::Imports,
3015            file_path,
3016            Language::Scala,
3017            node.start_line,
3018            0,
3019        );
3020        nodes.push(node);
3021    }
3022
3023    let type_re = Regex::new(
3024        r"^\s*(?:(private|protected)\s+)?(class|object|trait|enum)\s+([A-Za-z_][A-Za-z0-9_]*)([^{=]*)",
3025    )
3026    .unwrap();
3027    let def_re = Regex::new(
3028        r"^\s*(?:(private|protected)\s+)?(?:override\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\s*\([^)]*\)\s*(?::\s*[A-Za-z_][A-Za-z0-9_\[\],\s]*)?",
3029    )
3030    .unwrap();
3031    let typealias_re = Regex::new(r"^\s*type\s+([A-Za-z_][A-Za-z0-9_]*)\s*=").unwrap();
3032    let mut type_stack: Vec<(usize, String, String)> = Vec::new();
3033    for (idx, line) in source.lines().enumerate() {
3034        let line_no = idx as i64 + 1;
3035        let trimmed = line.trim();
3036        if trimmed.is_empty() || trimmed.starts_with("//") {
3037            continue;
3038        }
3039        let indent = python_indent_width(line);
3040        while type_stack
3041            .last()
3042            .is_some_and(|(type_indent, _, _)| indent <= *type_indent)
3043        {
3044            type_stack.pop();
3045        }
3046        if let Some(cap) = type_re.captures(line) {
3047            let kind = match cap.get(2).unwrap().as_str() {
3048                "trait" => NodeKind::Trait,
3049                "enum" => NodeKind::Enum,
3050                "object" => NodeKind::Module,
3051                _ => NodeKind::Class,
3052            };
3053            let name = cap.get(3).unwrap();
3054            let mut node = make_node(
3055                file_path,
3056                Language::Scala,
3057                kind,
3058                name.as_str(),
3059                line_no,
3060                indent as i64,
3061                now,
3062                Some(trimmed.to_string()),
3063            );
3064            node.visibility = cap
3065                .get(1)
3066                .map(|m| m.as_str().to_string())
3067                .or_else(|| Some("public".to_string()));
3068            node.is_exported = node.visibility.as_deref() == Some("public");
3069            add_contains(nodes, edges, &node);
3070            add_scala_inheritance_refs(
3071                &node.id,
3072                cap.get(4).map(|m| m.as_str()).unwrap_or_default(),
3073                file_path,
3074                line_no,
3075                refs,
3076            );
3077            if !trimmed.contains('}') {
3078                type_stack.push((indent, name.as_str().to_string(), node.id.clone()));
3079            }
3080            nodes.push(node);
3081            continue;
3082        }
3083        if let Some(cap) = def_re.captures(line) {
3084            let name = cap.get(2).unwrap().as_str();
3085            let kind = if type_stack.is_empty() {
3086                NodeKind::Function
3087            } else {
3088                NodeKind::Method
3089            };
3090            let mut node = make_node(
3091                file_path,
3092                Language::Scala,
3093                kind,
3094                name,
3095                line_no,
3096                indent as i64,
3097                now,
3098                Some(trimmed.to_string()),
3099            );
3100            node.visibility = cap
3101                .get(1)
3102                .map(|m| m.as_str().to_string())
3103                .or_else(|| Some("public".to_string()));
3104            node.is_exported = node.visibility.as_deref() == Some("public");
3105            if let Some((_, owner, parent_id)) = type_stack.last() {
3106                node.qualified_name = format!("{}.{}", owner, name);
3107                edges.push(Edge {
3108                    id: None,
3109                    source: parent_id.clone(),
3110                    target: node.id.clone(),
3111                    kind: EdgeKind::Contains,
3112                    line: None,
3113                    col: None,
3114                    provenance: Some("scala".into()),
3115                });
3116            } else {
3117                add_contains(nodes, edges, &node);
3118            }
3119            nodes.push(node);
3120            continue;
3121        }
3122        if let Some(cap) = typealias_re.captures(line) {
3123            let name = cap.get(1).unwrap();
3124            let node = make_node(
3125                file_path,
3126                Language::Scala,
3127                NodeKind::TypeAlias,
3128                name.as_str(),
3129                line_no,
3130                indent as i64,
3131                now,
3132                Some(trimmed.to_string()),
3133            );
3134            add_contains(nodes, edges, &node);
3135            nodes.push(node);
3136        }
3137    }
3138    add_call_refs(
3139        file_path,
3140        source,
3141        Language::Scala,
3142        nodes,
3143        refs,
3144        r"([A-Za-z_][A-Za-z0-9_\.]*)\s*\(",
3145    );
3146}
3147
3148fn add_scala_inheritance_refs(
3149    node_id: &str,
3150    tail: &str,
3151    file_path: &str,
3152    line: i64,
3153    refs: &mut Vec<UnresolvedReference>,
3154) {
3155    let extends_re = Regex::new(r"\bextends\s+([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
3156    let with_re = Regex::new(r"\bwith\s+([A-Za-z_][A-Za-z0-9_\.]*)").unwrap();
3157    if let Some(cap) = extends_re.captures(tail) {
3158        refs_push(
3159            refs,
3160            node_id,
3161            cap.get(1).unwrap().as_str(),
3162            EdgeKind::Extends,
3163            file_path,
3164            Language::Scala,
3165            line,
3166            0,
3167        );
3168    }
3169    for cap in with_re.captures_iter(tail) {
3170        refs_push(
3171            refs,
3172            node_id,
3173            cap.get(1).unwrap().as_str(),
3174            EdgeKind::Implements,
3175            file_path,
3176            Language::Scala,
3177            line,
3178            0,
3179        );
3180    }
3181}
3182
3183fn extract_rust(
3184    file_path: &str,
3185    source: &str,
3186    now: i64,
3187    nodes: &mut Vec<Node>,
3188    edges: &mut Vec<Edge>,
3189    refs: &mut Vec<UnresolvedReference>,
3190) {
3191    if try_extract_rust_tree_sitter(file_path, source, now, nodes, edges, refs) {
3192        return;
3193    }
3194
3195    add_regex_nodes(
3196        file_path,
3197        source,
3198        Language::Rust,
3199        now,
3200        nodes,
3201        edges,
3202        r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*([^{;]*)",
3203        NodeKind::Function,
3204    );
3205    add_regex_nodes(
3206        file_path,
3207        source,
3208        Language::Rust,
3209        now,
3210        nodes,
3211        edges,
3212        r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?struct\s+([A-Za-z_][A-Za-z0-9_]*)",
3213        NodeKind::Struct,
3214    );
3215    add_regex_nodes(
3216        file_path,
3217        source,
3218        Language::Rust,
3219        now,
3220        nodes,
3221        edges,
3222        r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?trait\s+([A-Za-z_][A-Za-z0-9_]*)",
3223        NodeKind::Trait,
3224    );
3225    add_regex_nodes(
3226        file_path,
3227        source,
3228        Language::Rust,
3229        now,
3230        nodes,
3231        edges,
3232        r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?enum\s+([A-Za-z_][A-Za-z0-9_]*)",
3233        NodeKind::Enum,
3234    );
3235    add_regex_nodes(
3236        file_path,
3237        source,
3238        Language::Rust,
3239        now,
3240        nodes,
3241        edges,
3242        r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?type\s+([A-Za-z_][A-Za-z0-9_]*)",
3243        NodeKind::TypeAlias,
3244    );
3245
3246    let use_re = Regex::new(r"(?m)^\s*use\s+([^;]+);").unwrap();
3247    for cap in use_re.captures_iter(source) {
3248        let full = cap.get(1).unwrap();
3249        let root = full
3250            .as_str()
3251            .split("::")
3252            .next()
3253            .unwrap_or(full.as_str())
3254            .trim_matches('{')
3255            .trim();
3256        let node = make_node(
3257            file_path,
3258            Language::Rust,
3259            NodeKind::Import,
3260            root,
3261            line_for(source, full.start()),
3262            0,
3263            now,
3264            Some(format!("use {};", full.as_str())),
3265        );
3266        add_contains(nodes, edges, &node);
3267        refs.push(unresolved(
3268            &nodes[0].id,
3269            root,
3270            EdgeKind::Imports,
3271            file_path,
3272            Language::Rust,
3273            node.start_line,
3274        ));
3275        nodes.push(node);
3276    }
3277
3278    let impl_re = Regex::new(
3279        r"(?m)^\s*impl(?:<[^>]+>)?\s+([A-Za-z_][A-Za-z0-9_:]*)\s+for\s+([A-Za-z_][A-Za-z0-9_]*)",
3280    )
3281    .unwrap();
3282    for cap in impl_re.captures_iter(source) {
3283        let trait_name = cap.get(1).unwrap().as_str().rsplit("::").next().unwrap();
3284        let type_name = cap.get(2).unwrap().as_str();
3285        if let Some(src) = nodes
3286            .iter()
3287            .find(|n| n.name == type_name && matches!(n.kind, NodeKind::Struct | NodeKind::Enum))
3288            .map(|n| n.id.clone())
3289        {
3290            refs.push(unresolved(
3291                &src,
3292                trait_name,
3293                EdgeKind::Implements,
3294                file_path,
3295                Language::Rust,
3296                line_for(source, cap.get(1).unwrap().start()),
3297            ));
3298        }
3299    }
3300    add_call_refs(
3301        file_path,
3302        source,
3303        Language::Rust,
3304        nodes,
3305        refs,
3306        r"([A-Za-z_][A-Za-z0-9_:]*)\s*\(",
3307    );
3308}
3309
3310fn extract_moonbit(
3311    file_path: &str,
3312    source: &str,
3313    now: i64,
3314    nodes: &mut Vec<Node>,
3315    edges: &mut Vec<Edge>,
3316    refs: &mut Vec<UnresolvedReference>,
3317) {
3318    if file_path.ends_with("moon.mod.json")
3319        || file_path.ends_with("moon.pkg.json")
3320        || file_path.ends_with("moon.pkg")
3321    {
3322        extract_moonbit_metadata(file_path, source, now, nodes, edges, refs);
3323        return;
3324    }
3325
3326    let source = if file_path.ends_with(".mbt.md") {
3327        extract_mbt_markdown_code_with_padding(source)
3328    } else {
3329        source.to_string()
3330    };
3331
3332    if try_extract_moonbit_tree_sitter(file_path, &source, now, nodes, edges, refs) {
3333        extract_moonbit_sol_routes(file_path, &source, now, nodes, edges, refs);
3334        return;
3335    }
3336
3337    add_regex_nodes(
3338        file_path,
3339        &source,
3340        Language::MoonBit,
3341        now,
3342        nodes,
3343        edges,
3344        r"(?m)^\s*(pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*([^{]*)",
3345        NodeKind::Function,
3346    );
3347    add_regex_nodes(
3348        file_path,
3349        &source,
3350        Language::MoonBit,
3351        now,
3352        nodes,
3353        edges,
3354        r"(?m)^\s*(pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*::[A-Za-z_][A-Za-z0-9_]*)\s*([^{]*)",
3355        NodeKind::Method,
3356    );
3357    add_regex_nodes(
3358        file_path,
3359        &source,
3360        Language::MoonBit,
3361        now,
3362        nodes,
3363        edges,
3364        r"(?m)^\s*(pub\s+)?struct\s+([A-Za-z_][A-Za-z0-9_]*)",
3365        NodeKind::Struct,
3366    );
3367    add_regex_nodes(
3368        file_path,
3369        &source,
3370        Language::MoonBit,
3371        now,
3372        nodes,
3373        edges,
3374        r"(?m)^\s*(pub\s+)?trait\s+([A-Za-z_][A-Za-z0-9_]*)",
3375        NodeKind::Trait,
3376    );
3377    add_regex_nodes(
3378        file_path,
3379        &source,
3380        Language::MoonBit,
3381        now,
3382        nodes,
3383        edges,
3384        r"(?m)^\s*(pub\s+)?enum\s+([A-Za-z_][A-Za-z0-9_]*)",
3385        NodeKind::Enum,
3386    );
3387    add_regex_nodes(
3388        file_path,
3389        &source,
3390        Language::MoonBit,
3391        now,
3392        nodes,
3393        edges,
3394        r"(?m)^\s*(pub\s+)?type\s+([A-Za-z_][A-Za-z0-9_]*)",
3395        NodeKind::TypeAlias,
3396    );
3397    add_regex_nodes(
3398        file_path,
3399        &source,
3400        Language::MoonBit,
3401        now,
3402        nodes,
3403        edges,
3404        r"(?m)^\s*(pub\s+)?let\s+([A-Za-z_][A-Za-z0-9_]*)",
3405        NodeKind::Variable,
3406    );
3407
3408    let import_re =
3409        Regex::new(r#"(?m)^\s*import\s+([@\w/.\-]+)(?:\s+as\s+([A-Za-z_][A-Za-z0-9_]*))?"#)
3410            .unwrap();
3411    for cap in import_re.captures_iter(&source) {
3412        let package = cap.get(1).unwrap().as_str();
3413        let name = cap.get(2).map(|m| m.as_str()).unwrap_or(package);
3414        let node = make_node(
3415            file_path,
3416            Language::MoonBit,
3417            NodeKind::Import,
3418            name,
3419            line_for(&source, cap.get(0).unwrap().start()),
3420            0,
3421            now,
3422            Some(cap.get(0).unwrap().as_str().to_string()),
3423        );
3424        add_contains(nodes, edges, &node);
3425        refs.push(unresolved(
3426            &nodes[0].id,
3427            name,
3428            EdgeKind::Imports,
3429            file_path,
3430            Language::MoonBit,
3431            node.start_line,
3432        ));
3433        nodes.push(node);
3434    }
3435    add_call_refs(
3436        file_path,
3437        &source,
3438        Language::MoonBit,
3439        nodes,
3440        refs,
3441        r"([@A-Za-z_][@A-Za-z0-9_:/]*)\s*\(",
3442    );
3443    extract_moonbit_sol_routes(file_path, &source, now, nodes, edges, refs);
3444}
3445
3446fn extract_moonbit_sol_routes(
3447    file_path: &str,
3448    source: &str,
3449    now: i64,
3450    nodes: &mut Vec<Node>,
3451    edges: &mut Vec<Edge>,
3452    refs: &mut Vec<UnresolvedReference>,
3453) {
3454    if !file_path.ends_with(".mbt") && !file_path.ends_with(".mbt.md") {
3455        return;
3456    }
3457
3458    let safe = strip_moonbit_comments_preserve_lines(source);
3459    let call_re = Regex::new(
3460        r#"@(?:sol|router)\.(route|page|api_get|api_post|api_put|api_delete|api_patch|raw_get|raw_post|raw_put|raw_delete|raw_patch)\s*\(\s*"([^"]+)"\s*,\s*([@A-Za-z_][@A-Za-z0-9_:.]*)"#,
3461    )
3462    .unwrap();
3463    let wrap_re = Regex::new(r#"@(?:sol|router)\.wrap\s*\(\s*"([^"]*)"\s*,"#).unwrap();
3464    let constructor_re = Regex::new(
3465        r#"SolRoutes::(Page|RawGet|RawPost|RawPut|RawDelete|RawPatch)\s*\([^)]*path\s*=\s*"([^"]+)"[^)]*handler\s*=\s*(?:PageHandler|RawHandler)?\(?\s*([@A-Za-z_][@A-Za-z0-9_:.]*)"#,
3466    )
3467    .unwrap();
3468    let named_page_re = Regex::new(
3469        r#"@(?:sol|router)\.page\s*\([^)]*path\s*=\s*"([^"]+)"[^)]*handler\s*=\s*([@A-Za-z_][@A-Za-z0-9_:.]*)"#,
3470    )
3471    .unwrap();
3472
3473    let mut prefix_stack: Vec<(usize, String)> = Vec::new();
3474    let mut byte_offset = 0usize;
3475    for line in safe.lines() {
3476        let indent = line.chars().take_while(|c| c.is_whitespace()).count();
3477        while prefix_stack
3478            .last()
3479            .map(|(stack_indent, _)| indent <= *stack_indent && line.trim_start().starts_with(']'))
3480            .unwrap_or(false)
3481        {
3482            prefix_stack.pop();
3483        }
3484
3485        if let Some(cap) = wrap_re.captures(line) {
3486            let prefix = cap.get(1).map(|m| m.as_str()).unwrap_or("");
3487            let full_prefix = join_route_paths(current_route_prefix(&prefix_stack), prefix);
3488            prefix_stack.push((indent, full_prefix));
3489        }
3490
3491        for cap in call_re.captures_iter(line) {
3492            let helper = cap.get(1).unwrap().as_str();
3493            let path = cap.get(2).unwrap().as_str();
3494            let handler = cap.get(3).map(|m| clean_moonbit_handler(m.as_str()));
3495            let route_path = join_route_paths(current_route_prefix(&prefix_stack), path);
3496            add_moonbit_route_node(
3497                file_path,
3498                &safe,
3499                byte_offset + cap.get(0).unwrap().start(),
3500                helper_route_method(helper),
3501                &route_path,
3502                handler.as_deref(),
3503                now,
3504                nodes,
3505                edges,
3506                refs,
3507            );
3508        }
3509
3510        for cap in named_page_re.captures_iter(line) {
3511            let path = cap.get(1).unwrap().as_str();
3512            let handler = cap.get(2).map(|m| clean_moonbit_handler(m.as_str()));
3513            let route_path = join_route_paths(current_route_prefix(&prefix_stack), path);
3514            add_moonbit_route_node(
3515                file_path,
3516                &safe,
3517                byte_offset + cap.get(0).unwrap().start(),
3518                "PAGE",
3519                &route_path,
3520                handler.as_deref(),
3521                now,
3522                nodes,
3523                edges,
3524                refs,
3525            );
3526        }
3527
3528        for cap in constructor_re.captures_iter(line) {
3529            let variant = cap.get(1).unwrap().as_str();
3530            let path = cap.get(2).unwrap().as_str();
3531            let handler = cap.get(3).map(|m| clean_moonbit_handler(m.as_str()));
3532            let route_path = join_route_paths(current_route_prefix(&prefix_stack), path);
3533            add_moonbit_route_node(
3534                file_path,
3535                &safe,
3536                byte_offset + cap.get(0).unwrap().start(),
3537                constructor_route_method(variant),
3538                &route_path,
3539                handler.as_deref(),
3540                now,
3541                nodes,
3542                edges,
3543                refs,
3544            );
3545        }
3546
3547        byte_offset += line.len() + 1;
3548    }
3549}
3550
3551fn add_moonbit_route_node(
3552    file_path: &str,
3553    source: &str,
3554    byte_offset: usize,
3555    method: &str,
3556    route_path: &str,
3557    handler: Option<&str>,
3558    now: i64,
3559    nodes: &mut Vec<Node>,
3560    edges: &mut Vec<Edge>,
3561    refs: &mut Vec<UnresolvedReference>,
3562) {
3563    let line = line_for(source, byte_offset);
3564    let name = format!("{method} {route_path}");
3565    let node = Node {
3566        id: format!("route:{file_path}:{line}:{method}:{route_path}"),
3567        kind: NodeKind::Route,
3568        name,
3569        qualified_name: format!("{file_path}::route:{method}:{route_path}"),
3570        file_path: file_path.to_string(),
3571        language: Language::MoonBit,
3572        start_line: line,
3573        end_line: line,
3574        start_column: 0,
3575        end_column: 0,
3576        docstring: None,
3577        signature: handler.map(|h| format!("{method} {route_path} -> {h}")),
3578        visibility: None,
3579        is_exported: false,
3580        is_async: false,
3581        is_static: false,
3582        is_abstract: false,
3583        updated_at: now,
3584    };
3585    add_contains(nodes, edges, &node);
3586    if let Some(handler) = handler {
3587        refs.push(unresolved(
3588            &node.id,
3589            handler,
3590            EdgeKind::References,
3591            file_path,
3592            Language::MoonBit,
3593            line,
3594        ));
3595    }
3596    nodes.push(node);
3597}
3598
3599fn add_framework_route_node(
3600    file_path: &str,
3601    language: Language,
3602    now: i64,
3603    nodes: &mut Vec<Node>,
3604    edges: &mut Vec<Edge>,
3605    refs: &mut Vec<UnresolvedReference>,
3606    method: &str,
3607    route_path: &str,
3608    handler: Option<&str>,
3609    line: i64,
3610    signature: Option<String>,
3611    provenance: &str,
3612) {
3613    let route_path = normalize_route_path(route_path);
3614    let method = method.to_ascii_uppercase();
3615    let name = format!("{method} {route_path}");
3616    let mut node = make_node(
3617        file_path,
3618        language,
3619        NodeKind::Route,
3620        &name,
3621        line,
3622        0,
3623        now,
3624        signature.or_else(|| handler.map(|h| format!("{method} {route_path} -> {h}"))),
3625    );
3626    node.id = format!("route:{file_path}:{line}:{method}:{route_path}");
3627    node.qualified_name = format!("{file_path}::route:{method}:{route_path}");
3628    add_contains(nodes, edges, &node);
3629    if let Some(edge) = edges.last_mut() {
3630        edge.provenance = Some(provenance.to_string());
3631    }
3632    if let Some(handler) = handler {
3633        refs_push(
3634            refs,
3635            &node.id,
3636            handler.trim(),
3637            EdgeKind::References,
3638            file_path,
3639            language,
3640            line,
3641            0,
3642        );
3643    }
3644    nodes.push(node);
3645}
3646
3647fn extract_web_file_routes(
3648    file_path: &str,
3649    language: Language,
3650    now: i64,
3651    nodes: &mut Vec<Node>,
3652    edges: &mut Vec<Edge>,
3653    refs: &mut Vec<UnresolvedReference>,
3654) {
3655    if let Some(route_path) = next_app_api_route_path(file_path) {
3656        for method in route_exported_methods(nodes) {
3657            add_framework_route_node(
3658                file_path,
3659                language,
3660                now,
3661                nodes,
3662                edges,
3663                refs,
3664                &method,
3665                &route_path,
3666                Some(&method),
3667                1,
3668                Some(format!("{method} {route_path}")),
3669                "file-route",
3670            );
3671        }
3672        return;
3673    }
3674
3675    if let Some(route_path) = file_based_page_route_path(file_path) {
3676        let handler = default_route_handler(nodes).map(str::to_string);
3677        add_framework_route_node(
3678            file_path,
3679            language,
3680            now,
3681            nodes,
3682            edges,
3683            refs,
3684            "PAGE",
3685            &route_path,
3686            handler.as_deref(),
3687            1,
3688            Some(format!("PAGE {route_path}")),
3689            "file-route",
3690        );
3691    }
3692}
3693
3694fn route_exported_methods(nodes: &[Node]) -> Vec<String> {
3695    let mut methods: Vec<String> = nodes
3696        .iter()
3697        .filter(|node| node.kind == NodeKind::Function && node.is_exported)
3698        .filter(|node| {
3699            matches!(
3700                node.name.as_str(),
3701                "GET" | "POST" | "PUT" | "PATCH" | "DELETE"
3702            )
3703        })
3704        .map(|node| node.name.clone())
3705        .collect();
3706    methods.sort();
3707    methods.dedup();
3708    methods
3709}
3710
3711fn default_route_handler(nodes: &[Node]) -> Option<&str> {
3712    nodes
3713        .iter()
3714        .find(|node| node.kind == NodeKind::Function && node.name == "default")
3715        .map(|node| node.name.as_str())
3716}
3717
3718fn next_app_api_route_path(file_path: &str) -> Option<String> {
3719    let path = file_path.strip_prefix("src/").unwrap_or(file_path);
3720    let route = path
3721        .strip_prefix("app/")
3722        .and_then(|p| p.strip_suffix("/route.ts"))
3723        .or_else(|| {
3724            path.strip_prefix("app/")
3725                .and_then(|p| p.strip_suffix("/route.js"))
3726        })?;
3727    Some(file_route_segments_to_path(route))
3728}
3729
3730fn file_based_page_route_path(file_path: &str) -> Option<String> {
3731    let path = file_path.strip_prefix("src/").unwrap_or(file_path);
3732    let route = path
3733        .strip_prefix("pages/")
3734        .and_then(strip_page_extension)
3735        .or_else(|| path.strip_prefix("routes/").and_then(strip_page_extension))?;
3736    Some(file_route_segments_to_path(route))
3737}
3738
3739fn strip_page_extension(path: &str) -> Option<&str> {
3740    for suffix in [".tsx", ".jsx", ".ts", ".js", ".svelte", ".vue"] {
3741        if let Some(stripped) = path.strip_suffix(suffix) {
3742            return Some(stripped);
3743        }
3744    }
3745    None
3746}
3747
3748fn file_route_segments_to_path(route: &str) -> String {
3749    let segments: Vec<String> = route
3750        .split('/')
3751        .filter(|segment| {
3752            !segment.is_empty()
3753                && *segment != "page"
3754                && *segment != "index"
3755                && !(segment.starts_with('(') && segment.ends_with(')'))
3756        })
3757        .map(|segment| {
3758            if segment.starts_with("[...") && segment.ends_with(']') {
3759                format!("*{}", &segment[4..segment.len() - 1])
3760            } else if segment.starts_with('[') && segment.ends_with(']') {
3761                format!(":{}", &segment[1..segment.len() - 1])
3762            } else {
3763                segment.to_string()
3764            }
3765        })
3766        .collect();
3767    normalize_route_path(&segments.join("/"))
3768}
3769
3770fn first_quoted_arg(args: &str) -> Option<&str> {
3771    let start_quote = args.find(['"', '\''])?;
3772    let quote = args.as_bytes()[start_quote] as char;
3773    let rest = &args[start_quote + 1..];
3774    let end = rest.find(quote)?;
3775    Some(&rest[..end])
3776}
3777
3778fn helper_route_method(helper: &str) -> &'static str {
3779    match helper {
3780        "route" | "page" => "PAGE",
3781        "api_get" => "GET",
3782        "api_post" => "POST",
3783        "api_put" => "PUT",
3784        "api_delete" => "DELETE",
3785        "api_patch" => "PATCH",
3786        "raw_get" => "RAW GET",
3787        "raw_post" => "RAW POST",
3788        "raw_put" => "RAW PUT",
3789        "raw_delete" => "RAW DELETE",
3790        "raw_patch" => "RAW PATCH",
3791        _ => "PAGE",
3792    }
3793}
3794
3795fn constructor_route_method(variant: &str) -> &'static str {
3796    match variant {
3797        "RawGet" => "RAW GET",
3798        "RawPost" => "RAW POST",
3799        "RawPut" => "RAW PUT",
3800        "RawDelete" => "RAW DELETE",
3801        "RawPatch" => "RAW PATCH",
3802        _ => "PAGE",
3803    }
3804}
3805
3806fn current_route_prefix(prefix_stack: &[(usize, String)]) -> &str {
3807    prefix_stack
3808        .last()
3809        .map(|(_, prefix)| prefix.as_str())
3810        .unwrap_or("")
3811}
3812
3813fn join_route_paths(prefix: &str, path: &str) -> String {
3814    if prefix.is_empty() || prefix == "/" {
3815        return normalize_route_path(path);
3816    }
3817    let path = normalize_route_path(path);
3818    if path == "/" {
3819        return normalize_route_path(prefix);
3820    }
3821    format!(
3822        "{}/{}",
3823        prefix.trim_end_matches('/'),
3824        path.trim_start_matches('/')
3825    )
3826}
3827
3828fn normalize_route_path(path: &str) -> String {
3829    if path.is_empty() {
3830        return "/".into();
3831    }
3832    let path = path.replace('\\', "/");
3833    if path.starts_with('/') {
3834        path
3835    } else {
3836        format!("/{path}")
3837    }
3838}
3839
3840fn clean_moonbit_handler(handler: &str) -> String {
3841    handler
3842        .trim()
3843        .trim_start_matches('@')
3844        .rsplit(['.', ':'])
3845        .next()
3846        .unwrap_or(handler)
3847        .trim_matches(')')
3848        .to_string()
3849}
3850
3851fn extract_moonbit_metadata(
3852    file_path: &str,
3853    source: &str,
3854    now: i64,
3855    nodes: &mut Vec<Node>,
3856    edges: &mut Vec<Edge>,
3857    refs: &mut Vec<UnresolvedReference>,
3858) {
3859    let Ok(json) = serde_json::from_str::<serde_json::Value>(source) else {
3860        return;
3861    };
3862    if file_path.ends_with("moon.mod.json") {
3863        if let Some(name) = json.get("name").and_then(|v| v.as_str()) {
3864            let node = make_node(
3865                file_path,
3866                Language::MoonBit,
3867                NodeKind::Module,
3868                name,
3869                1,
3870                0,
3871                now,
3872                Some("moon.mod.json".into()),
3873            );
3874            add_contains(nodes, edges, &node);
3875            nodes.push(node);
3876        }
3877        return;
3878    }
3879
3880    let package_name = json
3881        .get("name")
3882        .and_then(|v| v.as_str())
3883        .or_else(|| file_path.rsplit('/').nth(1))
3884        .unwrap_or("moonbit-package");
3885    let node = make_node(
3886        file_path,
3887        Language::MoonBit,
3888        NodeKind::Module,
3889        package_name,
3890        1,
3891        0,
3892        now,
3893        Some(file_path.rsplit('/').next().unwrap_or("moon.pkg").into()),
3894    );
3895    add_contains(nodes, edges, &node);
3896    let package_node_id = node.id.clone();
3897    nodes.push(node);
3898
3899    if let Some(imports) = json.get("import").or_else(|| json.get("imports")) {
3900        if let Some(obj) = imports.as_object() {
3901            for (alias, value) in obj {
3902                let target = value.as_str().unwrap_or(alias);
3903                let import_node = make_node(
3904                    file_path,
3905                    Language::MoonBit,
3906                    NodeKind::Import,
3907                    alias,
3908                    1,
3909                    0,
3910                    now,
3911                    Some(target.to_string()),
3912                );
3913                add_contains(nodes, edges, &import_node);
3914                refs.push(unresolved(
3915                    &package_node_id,
3916                    alias,
3917                    EdgeKind::Imports,
3918                    file_path,
3919                    Language::MoonBit,
3920                    1,
3921                ));
3922                nodes.push(import_node);
3923            }
3924        }
3925    }
3926}
3927
3928fn try_extract_rust_tree_sitter(
3929    file_path: &str,
3930    source: &str,
3931    now: i64,
3932    nodes: &mut Vec<Node>,
3933    edges: &mut Vec<Edge>,
3934    refs: &mut Vec<UnresolvedReference>,
3935) -> bool {
3936    let mut parser = Parser::new();
3937    if parser
3938        .set_language(&tree_sitter_rust::LANGUAGE.into())
3939        .is_err()
3940    {
3941        return false;
3942    }
3943    let Some(tree) = parser.parse(source, None) else {
3944        return false;
3945    };
3946    if tree.root_node().has_error() {
3947        return false;
3948    }
3949
3950    let root = tree.root_node();
3951    let mut stack = Vec::new();
3952    collect_rust_nodes(file_path, source, root, now, nodes, edges, refs, &mut stack);
3953    collect_rust_refs(file_path, source, root, nodes, refs);
3954    true
3955}
3956
3957fn collect_rust_nodes(
3958    file_path: &str,
3959    source: &str,
3960    node: SyntaxNode,
3961    now: i64,
3962    nodes: &mut Vec<Node>,
3963    edges: &mut Vec<Edge>,
3964    refs: &mut Vec<UnresolvedReference>,
3965    stack: &mut Vec<String>,
3966) {
3967    let kind = match node.kind() {
3968        "function_item" => {
3969            if rust_receiver_type(node, source).is_some() {
3970                Some(NodeKind::Method)
3971            } else {
3972                Some(NodeKind::Function)
3973            }
3974        }
3975        "struct_item" => Some(NodeKind::Struct),
3976        "trait_item" => Some(NodeKind::Trait),
3977        "enum_item" => Some(NodeKind::Enum),
3978        "enum_variant" => Some(NodeKind::EnumMember),
3979        "type_item" => Some(NodeKind::TypeAlias),
3980        "const_item" => Some(NodeKind::Constant),
3981        "static_item" => Some(NodeKind::Variable),
3982        "let_declaration" => Some(NodeKind::Variable),
3983        "field_declaration" => Some(NodeKind::Field),
3984        "function_signature_item" => Some(NodeKind::Method),
3985        "use_declaration" => Some(NodeKind::Import),
3986        "mod_item" => Some(NodeKind::Module),
3987        _ => None,
3988    };
3989
3990    let mut pushed = false;
3991    if let Some(kind) = kind {
3992        if let Some(name) = rust_node_name(node, source, kind) {
3993            let signature = Some(
3994                node_text(node, source)
3995                    .lines()
3996                    .next()
3997                    .unwrap_or("")
3998                    .trim()
3999                    .to_string(),
4000            );
4001            let mut out =
4002                make_node_span(file_path, Language::Rust, kind, &name, node, now, signature);
4003            out.is_exported = rust_is_public(node, source);
4004            out.visibility = if out.is_exported {
4005                Some("public".into())
4006            } else if matches!(
4007                kind,
4008                NodeKind::Function
4009                    | NodeKind::Method
4010                    | NodeKind::Struct
4011                    | NodeKind::Trait
4012                    | NodeKind::Enum
4013                    | NodeKind::TypeAlias
4014            ) {
4015                Some("private".into())
4016            } else {
4017                None
4018            };
4019            out.is_async = node_text(node, source).trim_start().starts_with("async ")
4020                || node_text(node, source).contains(" async fn ");
4021            if kind == NodeKind::Method {
4022                if let Some(owner) = rust_receiver_type(node, source) {
4023                    out.qualified_name = format!("{owner}::{name}");
4024                }
4025            }
4026            add_contains_from_stack(nodes, edges, stack, &out, "tree-sitter");
4027            let id = out.id.clone();
4028            nodes.push(out);
4029            if matches!(
4030                kind,
4031                NodeKind::Struct
4032                    | NodeKind::Trait
4033                    | NodeKind::Enum
4034                    | NodeKind::Module
4035                    | NodeKind::Function
4036                    | NodeKind::Method
4037            ) {
4038                stack.push(id);
4039                pushed = true;
4040            }
4041        }
4042    }
4043
4044    if node.kind() == "impl_item" {
4045        if let Some((trait_name, type_name)) = rust_impl_trait_for_type(node, source) {
4046            if let Some(type_node) = nodes.iter().find(|n| {
4047                n.name == type_name
4048                    && matches!(n.kind, NodeKind::Struct | NodeKind::Enum | NodeKind::Trait)
4049            }) {
4050                refs_push(
4051                    refs,
4052                    &type_node.id,
4053                    &trait_name,
4054                    EdgeKind::Implements,
4055                    file_path,
4056                    Language::Rust,
4057                    node.start_position().row as i64 + 1,
4058                    node.start_position().column as i64,
4059                );
4060            }
4061        }
4062    }
4063
4064    for child in named_children(node) {
4065        collect_rust_nodes(file_path, source, child, now, nodes, edges, refs, stack);
4066    }
4067
4068    if pushed {
4069        stack.pop();
4070    }
4071}
4072
4073fn collect_rust_refs(
4074    file_path: &str,
4075    source: &str,
4076    node: SyntaxNode,
4077    nodes: &[Node],
4078    refs: &mut Vec<UnresolvedReference>,
4079) {
4080    match node.kind() {
4081        "use_declaration" => {
4082            if let Some(name) = rust_import_root(node, source) {
4083                refs_push(
4084                    refs,
4085                    &format!("file:{file_path}"),
4086                    &name,
4087                    EdgeKind::Imports,
4088                    file_path,
4089                    Language::Rust,
4090                    node.start_position().row as i64 + 1,
4091                    node.start_position().column as i64,
4092                );
4093            }
4094        }
4095        "call_expression" => {
4096            if let Some(function) = node.child_by_field_name("function") {
4097                if let Some(name) = callable_name(function, source) {
4098                    if let Some(caller) =
4099                        enclosing_callable(nodes, node.start_position().row as i64 + 1)
4100                    {
4101                        refs_push(
4102                            refs,
4103                            &caller.id,
4104                            &name,
4105                            EdgeKind::Calls,
4106                            file_path,
4107                            Language::Rust,
4108                            node.start_position().row as i64 + 1,
4109                            node.start_position().column as i64,
4110                        );
4111                    }
4112                }
4113            }
4114        }
4115        _ => {}
4116    }
4117
4118    for child in named_children(node) {
4119        collect_rust_refs(file_path, source, child, nodes, refs);
4120    }
4121}
4122
4123fn try_extract_moonbit_tree_sitter(
4124    file_path: &str,
4125    source: &str,
4126    now: i64,
4127    nodes: &mut Vec<Node>,
4128    edges: &mut Vec<Edge>,
4129    refs: &mut Vec<UnresolvedReference>,
4130) -> bool {
4131    let mut parser = Parser::new();
4132    if parser
4133        .set_language(&tree_sitter_moonbit::LANGUAGE.into())
4134        .is_err()
4135    {
4136        return false;
4137    }
4138    let Some(tree) = parser.parse(source, None) else {
4139        return false;
4140    };
4141    if tree.root_node().has_error() {
4142        return false;
4143    }
4144
4145    let root = tree.root_node();
4146    let mut stack = Vec::new();
4147    collect_moonbit_nodes(file_path, source, root, now, nodes, edges, &mut stack);
4148    collect_moonbit_refs(file_path, source, root, nodes, refs);
4149    true
4150}
4151
4152fn collect_moonbit_nodes(
4153    file_path: &str,
4154    source: &str,
4155    node: SyntaxNode,
4156    now: i64,
4157    nodes: &mut Vec<Node>,
4158    edges: &mut Vec<Edge>,
4159    stack: &mut Vec<String>,
4160) {
4161    let kind = match node.kind() {
4162        "function_definition" => Some(NodeKind::Function),
4163        "impl_definition" => Some(NodeKind::Method),
4164        "struct_definition" | "tuple_struct_definition" => Some(NodeKind::Struct),
4165        "trait_definition" => Some(NodeKind::Trait),
4166        "trait_method_declaration" => Some(NodeKind::Method),
4167        "enum_definition" => Some(NodeKind::Enum),
4168        "enum_constructor" => Some(NodeKind::EnumMember),
4169        "type_alias_definition" | "type_definition" => Some(NodeKind::TypeAlias),
4170        "const_definition" => Some(NodeKind::Constant),
4171        "import_declaration" => Some(NodeKind::Import),
4172        "package_declaration" => Some(NodeKind::Module),
4173        _ => None,
4174    };
4175
4176    let mut pushed = false;
4177    if let Some(kind) = kind {
4178        if let Some(name) = moonbit_node_name(node, source, kind) {
4179            let signature = Some(
4180                node_text(node, source)
4181                    .lines()
4182                    .next()
4183                    .unwrap_or("")
4184                    .trim()
4185                    .to_string(),
4186            );
4187            let mut out = make_node_span(
4188                file_path,
4189                Language::MoonBit,
4190                kind,
4191                &name,
4192                node,
4193                now,
4194                signature,
4195            );
4196            out.is_exported = moonbit_is_public(node, source);
4197            out.visibility = if out.is_exported {
4198                Some("public".into())
4199            } else {
4200                None
4201            };
4202            if kind == NodeKind::Method {
4203                if let Some(owner) = moonbit_impl_owner(node, source) {
4204                    out.qualified_name = format!("{owner}::{name}");
4205                }
4206            }
4207            add_contains_from_stack(nodes, edges, stack, &out, "tree-sitter");
4208            let id = out.id.clone();
4209            nodes.push(out);
4210            if matches!(
4211                kind,
4212                NodeKind::Struct
4213                    | NodeKind::Trait
4214                    | NodeKind::Enum
4215                    | NodeKind::Module
4216                    | NodeKind::Function
4217                    | NodeKind::Method
4218            ) {
4219                stack.push(id);
4220                pushed = true;
4221            }
4222        }
4223    }
4224
4225    for child in named_children(node) {
4226        collect_moonbit_nodes(file_path, source, child, now, nodes, edges, stack);
4227    }
4228
4229    if pushed {
4230        stack.pop();
4231    }
4232}
4233
4234fn collect_moonbit_refs(
4235    file_path: &str,
4236    source: &str,
4237    node: SyntaxNode,
4238    nodes: &[Node],
4239    refs: &mut Vec<UnresolvedReference>,
4240) {
4241    match node.kind() {
4242        "import_declaration" => {
4243            for child in named_children(node) {
4244                if child.kind() == "import_item" {
4245                    if let Some(name) = moonbit_import_name(child, source) {
4246                        refs_push(
4247                            refs,
4248                            &format!("file:{file_path}"),
4249                            &name,
4250                            EdgeKind::Imports,
4251                            file_path,
4252                            Language::MoonBit,
4253                            child.start_position().row as i64 + 1,
4254                            child.start_position().column as i64,
4255                        );
4256                    }
4257                }
4258            }
4259        }
4260        "apply_expression" | "dot_apply_expression" | "dot_dot_apply_expression" => {
4261            if let Some(name) = moonbit_call_name(node, source) {
4262                if let Some(caller) =
4263                    enclosing_callable(nodes, node.start_position().row as i64 + 1)
4264                {
4265                    refs_push(
4266                        refs,
4267                        &caller.id,
4268                        &name,
4269                        EdgeKind::Calls,
4270                        file_path,
4271                        Language::MoonBit,
4272                        node.start_position().row as i64 + 1,
4273                        node.start_position().column as i64,
4274                    );
4275                }
4276            }
4277        }
4278        _ => {}
4279    }
4280
4281    for child in named_children(node) {
4282        collect_moonbit_refs(file_path, source, child, nodes, refs);
4283    }
4284}
4285
4286fn extract_liquid_vue_svelte(
4287    file_path: &str,
4288    source: &str,
4289    language: Language,
4290    now: i64,
4291    nodes: &mut Vec<Node>,
4292    edges: &mut Vec<Edge>,
4293    refs: &mut Vec<UnresolvedReference>,
4294) {
4295    match language {
4296        Language::Liquid => extract_liquid(file_path, source, now, nodes, edges, refs),
4297        Language::Vue | Language::Svelte => {
4298            extract_component_file(file_path, source, language, now, nodes, edges);
4299            extract_component_script_symbols(file_path, source, language, now, nodes, edges, refs);
4300            extract_web_file_routes(file_path, language, now, nodes, edges, refs);
4301            match language {
4302                Language::Vue => extract_vue_template_components(file_path, source, language, refs),
4303                Language::Svelte => extract_svelte_template_refs(file_path, source, language, refs),
4304                _ => {}
4305            }
4306        }
4307        _ => {}
4308    }
4309}
4310
4311fn extract_liquid(
4312    file_path: &str,
4313    source: &str,
4314    now: i64,
4315    nodes: &mut Vec<Node>,
4316    edges: &mut Vec<Edge>,
4317    refs: &mut Vec<UnresolvedReference>,
4318) {
4319    let snippet_re = Regex::new(r#"\{%-?\s*(render|include)\s+['"]([^'"]+)['"]"#).unwrap();
4320    for cap in snippet_re.captures_iter(source) {
4321        let full = cap.get(0).unwrap();
4322        let tag = cap.get(1).unwrap().as_str();
4323        let name = cap.get(2).unwrap();
4324        let line = line_for(source, full.start());
4325        add_liquid_reference_node(
4326            file_path,
4327            now,
4328            nodes,
4329            edges,
4330            refs,
4331            name.as_str(),
4332            &format!("{}:{}", tag, name.as_str()),
4333            &format!("snippets/{}.liquid", name.as_str()),
4334            line,
4335            full.as_str(),
4336        );
4337    }
4338
4339    let section_re = Regex::new(r#"\{%-?\s*section\s+['"]([^'"]+)['"]"#).unwrap();
4340    for cap in section_re.captures_iter(source) {
4341        let full = cap.get(0).unwrap();
4342        let name = cap.get(1).unwrap();
4343        let line = line_for(source, full.start());
4344        add_liquid_reference_node(
4345            file_path,
4346            now,
4347            nodes,
4348            edges,
4349            refs,
4350            name.as_str(),
4351            &format!("section:{}", name.as_str()),
4352            &format!("sections/{}.liquid", name.as_str()),
4353            line,
4354            full.as_str(),
4355        );
4356    }
4357
4358    let schema_re =
4359        Regex::new(r"(?s)\{%-?\s*schema\s*-?%\}(.*?)\{%-?\s*endschema\s*-?%\}").unwrap();
4360    for cap in schema_re.captures_iter(source) {
4361        let full = cap.get(0).unwrap();
4362        let body = cap.get(1).map(|m| m.as_str()).unwrap_or_default();
4363        let line = line_for(source, full.start());
4364        let mut node = make_node(
4365            file_path,
4366            Language::Liquid,
4367            NodeKind::Constant,
4368            "schema",
4369            line,
4370            0,
4371            now,
4372            Some(
4373                full.as_str()
4374                    .lines()
4375                    .next()
4376                    .unwrap_or("{% schema %}")
4377                    .trim()
4378                    .to_string(),
4379            ),
4380        );
4381        node.qualified_name = format!("{}::schema", file_path);
4382        node.docstring = Some(body.trim().chars().take(200).collect());
4383        add_contains(nodes, edges, &node);
4384        nodes.push(node);
4385    }
4386
4387    let assign_re = Regex::new(r"\{%-?\s*assign\s+([A-Za-z_][A-Za-z0-9_]*)\s*=").unwrap();
4388    for cap in assign_re.captures_iter(source) {
4389        let full = cap.get(0).unwrap();
4390        let name = cap.get(1).unwrap();
4391        let mut node = make_node(
4392            file_path,
4393            Language::Liquid,
4394            NodeKind::Variable,
4395            name.as_str(),
4396            line_for(source, name.start()),
4397            0,
4398            now,
4399            Some(full.as_str().trim().to_string()),
4400        );
4401        node.qualified_name = format!("{}::{}", file_path, name.as_str());
4402        add_contains(nodes, edges, &node);
4403        nodes.push(node);
4404    }
4405}
4406
4407fn add_liquid_reference_node(
4408    file_path: &str,
4409    now: i64,
4410    nodes: &mut Vec<Node>,
4411    edges: &mut Vec<Edge>,
4412    refs: &mut Vec<UnresolvedReference>,
4413    name: &str,
4414    qualified_suffix: &str,
4415    reference_name: &str,
4416    line: i64,
4417    signature: &str,
4418) {
4419    let import_node = make_node(
4420        file_path,
4421        Language::Liquid,
4422        NodeKind::Import,
4423        name,
4424        line,
4425        0,
4426        now,
4427        Some(signature.trim().to_string()),
4428    );
4429    add_contains(nodes, edges, &import_node);
4430    nodes.push(import_node);
4431
4432    let mut component_node = make_node(
4433        file_path,
4434        Language::Liquid,
4435        NodeKind::Component,
4436        name,
4437        line,
4438        0,
4439        now,
4440        Some(signature.trim().to_string()),
4441    );
4442    component_node.qualified_name = format!("{}::{}", file_path, qualified_suffix);
4443    add_contains(nodes, edges, &component_node);
4444    nodes.push(component_node);
4445
4446    refs.push(unresolved(
4447        &nodes[0].id,
4448        reference_name,
4449        EdgeKind::References,
4450        file_path,
4451        Language::Liquid,
4452        line,
4453    ));
4454}
4455
4456fn extract_component_file(
4457    file_path: &str,
4458    source: &str,
4459    language: Language,
4460    now: i64,
4461    nodes: &mut Vec<Node>,
4462    edges: &mut Vec<Edge>,
4463) {
4464    let name = component_name_from_path(file_path, language);
4465    let mut node = make_node(
4466        file_path,
4467        language,
4468        NodeKind::Component,
4469        &name,
4470        1,
4471        0,
4472        now,
4473        None,
4474    );
4475    node.qualified_name = format!("{}::{}", file_path, name);
4476    node.end_line = source.lines().count().max(1) as i64;
4477    node.is_exported = true;
4478    node.visibility = Some("public".to_string());
4479    add_contains(nodes, edges, &node);
4480    nodes.push(node);
4481}
4482
4483fn component_name_from_path(file_path: &str, language: Language) -> String {
4484    let file_name = file_path.rsplit('/').next().unwrap_or(file_path);
4485    match language {
4486        Language::Vue => file_name.strip_suffix(".vue").unwrap_or(file_name),
4487        Language::Svelte => file_name.strip_suffix(".svelte").unwrap_or(file_name),
4488        _ => file_name,
4489    }
4490    .to_string()
4491}
4492
4493fn extract_component_script_symbols(
4494    file_path: &str,
4495    source: &str,
4496    language: Language,
4497    now: i64,
4498    nodes: &mut Vec<Node>,
4499    edges: &mut Vec<Edge>,
4500    refs: &mut Vec<UnresolvedReference>,
4501) {
4502    for block in script_blocks(source) {
4503        let before_nodes = nodes.len();
4504        let before_refs = refs.len();
4505        extract_typescript_javascript(file_path, &block.content, language, now, nodes, edges, refs);
4506        for node in nodes.iter_mut().skip(before_nodes) {
4507            node.start_line += block.start_line - 1;
4508            node.end_line += block.start_line - 1;
4509        }
4510        for reference in refs.iter_mut().skip(before_refs) {
4511            reference.line += block.start_line - 1;
4512        }
4513    }
4514}
4515
4516struct ScriptBlock {
4517    content: String,
4518    start_line: i64,
4519}
4520
4521fn script_blocks(source: &str) -> Vec<ScriptBlock> {
4522    let re = Regex::new(r"(?is)<script(?:\s[^>]*)?>(.*?)</script>").unwrap();
4523    re.captures_iter(source)
4524        .filter_map(|cap| {
4525            let content = cap.get(1)?;
4526            Some(ScriptBlock {
4527                content: content.as_str().to_string(),
4528                start_line: line_for(source, content.start()),
4529            })
4530        })
4531        .collect()
4532}
4533
4534fn extract_vue_template_components(
4535    file_path: &str,
4536    source: &str,
4537    language: Language,
4538    refs: &mut Vec<UnresolvedReference>,
4539) {
4540    let tag_re = Regex::new(r"<([A-Z][A-Za-z0-9_$]*)\b").unwrap();
4541    for cap in tag_re.captures_iter(source) {
4542        let tag = cap.get(1).unwrap();
4543        refs.push(unresolved(
4544            &format!("file:{}", file_path),
4545            tag.as_str(),
4546            EdgeKind::References,
4547            file_path,
4548            language,
4549            line_for(source, tag.start()),
4550        ));
4551    }
4552}
4553
4554fn extract_svelte_template_refs(
4555    file_path: &str,
4556    source: &str,
4557    language: Language,
4558    refs: &mut Vec<UnresolvedReference>,
4559) {
4560    extract_vue_template_components(file_path, source, language, refs);
4561    let expr_re = Regex::new(r"\{([^}#/:@][^}]*)\}").unwrap();
4562    let call_re = Regex::new(r"\b([A-Za-z_$][A-Za-z0-9_$.]*)\s*\(").unwrap();
4563    let runes = [
4564        "$props",
4565        "$state",
4566        "$derived",
4567        "$effect",
4568        "$bindable",
4569        "$inspect",
4570        "$host",
4571        "$snippet",
4572    ];
4573    for expr in expr_re.captures_iter(source) {
4574        let Some(body) = expr.get(1) else {
4575            continue;
4576        };
4577        for call in call_re.captures_iter(body.as_str()) {
4578            let name = call.get(1).unwrap().as_str();
4579            if runes.contains(&name) || matches!(name, "if" | "else" | "each" | "await") {
4580                continue;
4581            }
4582            refs.push(unresolved(
4583                &format!("file:{}", file_path),
4584                name,
4585                EdgeKind::Calls,
4586                file_path,
4587                language,
4588                line_for(source, body.start() + call.get(1).unwrap().start()),
4589            ));
4590        }
4591    }
4592}
4593
4594fn extract_generic(
4595    file_path: &str,
4596    source: &str,
4597    language: Language,
4598    now: i64,
4599    nodes: &mut Vec<Node>,
4600    edges: &mut Vec<Edge>,
4601    refs: &mut Vec<UnresolvedReference>,
4602) {
4603    add_regex_nodes(
4604        file_path,
4605        source,
4606        language,
4607        now,
4608        nodes,
4609        edges,
4610        r"(?m)^\s*(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)",
4611        NodeKind::Function,
4612    );
4613    add_regex_nodes(
4614        file_path,
4615        source,
4616        language,
4617        now,
4618        nodes,
4619        edges,
4620        r"(?m)^\s*(?:export\s+)?class\s+([A-Za-z_$][A-Za-z0-9_$]*)",
4621        NodeKind::Class,
4622    );
4623    add_call_refs(
4624        file_path,
4625        source,
4626        language,
4627        nodes,
4628        refs,
4629        r"([A-Za-z_$][A-Za-z0-9_$.]*)\s*\(",
4630    );
4631}
4632
4633fn add_regex_nodes(
4634    file_path: &str,
4635    source: &str,
4636    language: Language,
4637    now: i64,
4638    nodes: &mut Vec<Node>,
4639    edges: &mut Vec<Edge>,
4640    pattern: &str,
4641    kind: NodeKind,
4642) {
4643    let re = Regex::new(pattern).unwrap();
4644    for cap in re.captures_iter(source) {
4645        let Some(name_match) = cap.get(2).or_else(|| cap.get(1)) else {
4646            continue;
4647        };
4648        let mut name = name_match.as_str().to_string();
4649        if kind == NodeKind::Method && name.contains("::") {
4650            name = name.rsplit("::").next().unwrap_or(&name).to_string();
4651        }
4652        let signature = cap.get(0).map(|m| m.as_str().trim().to_string());
4653        let line = line_for(source, name_match.start());
4654        let mut node = make_node(file_path, language, kind, &name, line, 0, now, signature);
4655        node.is_exported = cap
4656            .get(1)
4657            .map(|m| m.as_str().contains("pub") || m.as_str().contains("export"))
4658            .unwrap_or(false);
4659        node.visibility = if node.is_exported {
4660            Some("public".into())
4661        } else {
4662            None
4663        };
4664        add_contains(nodes, edges, &node);
4665        nodes.push(node);
4666    }
4667}
4668
4669fn add_call_refs(
4670    file_path: &str,
4671    source: &str,
4672    language: Language,
4673    nodes: &[Node],
4674    refs: &mut Vec<UnresolvedReference>,
4675    pattern: &str,
4676) {
4677    let re = Regex::new(pattern).unwrap();
4678    let keywords = [
4679        "if", "for", "while", "match", "return", "fn", "test", "inspect", "Some", "Ok", "Err",
4680    ];
4681    for cap in re.captures_iter(source) {
4682        let name = cap.get(1).unwrap().as_str().rsplit("::").next().unwrap();
4683        if keywords.contains(&name) {
4684            continue;
4685        }
4686        let line = line_for(source, cap.get(1).unwrap().start());
4687        if let Some(caller) = nodes
4688            .iter()
4689            .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
4690            .rev()
4691            .find(|n| n.start_line <= line)
4692        {
4693            refs.push(unresolved(
4694                &caller.id,
4695                name,
4696                EdgeKind::Calls,
4697                file_path,
4698                language,
4699                line,
4700            ));
4701        }
4702    }
4703}
4704
4705fn make_node(
4706    file_path: &str,
4707    language: Language,
4708    kind: NodeKind,
4709    name: &str,
4710    line: i64,
4711    col: i64,
4712    now: i64,
4713    signature: Option<String>,
4714) -> Node {
4715    Node {
4716        id: format!("{}:{}:{}:{}", kind.as_str(), file_path, name, line),
4717        kind,
4718        name: name.to_string(),
4719        qualified_name: name.to_string(),
4720        file_path: file_path.to_string(),
4721        language,
4722        start_line: line,
4723        end_line: line,
4724        start_column: col,
4725        end_column: col,
4726        docstring: None,
4727        signature,
4728        visibility: None,
4729        is_exported: false,
4730        is_async: false,
4731        is_static: false,
4732        is_abstract: false,
4733        updated_at: now,
4734    }
4735}
4736
4737fn make_node_span(
4738    file_path: &str,
4739    language: Language,
4740    kind: NodeKind,
4741    name: &str,
4742    node: SyntaxNode,
4743    now: i64,
4744    signature: Option<String>,
4745) -> Node {
4746    let start = node.start_position();
4747    let end = node.end_position();
4748    Node {
4749        id: format!("{}:{}:{}:{}", kind.as_str(), file_path, name, start.row + 1),
4750        kind,
4751        name: name.to_string(),
4752        qualified_name: name.to_string(),
4753        file_path: file_path.to_string(),
4754        language,
4755        start_line: start.row as i64 + 1,
4756        end_line: end.row as i64 + 1,
4757        start_column: start.column as i64,
4758        end_column: end.column as i64,
4759        docstring: None,
4760        signature,
4761        visibility: None,
4762        is_exported: false,
4763        is_async: false,
4764        is_static: false,
4765        is_abstract: false,
4766        updated_at: now,
4767    }
4768}
4769
4770fn add_contains(nodes: &[Node], edges: &mut Vec<Edge>, node: &Node) {
4771    if let Some(file) = nodes.first() {
4772        edges.push(Edge {
4773            id: None,
4774            source: file.id.clone(),
4775            target: node.id.clone(),
4776            kind: EdgeKind::Contains,
4777            line: None,
4778            col: None,
4779            provenance: Some("regex".into()),
4780        });
4781    }
4782}
4783
4784fn add_contains_from_stack(
4785    nodes: &[Node],
4786    edges: &mut Vec<Edge>,
4787    stack: &[String],
4788    node: &Node,
4789    provenance: &str,
4790) {
4791    let source = stack
4792        .last()
4793        .cloned()
4794        .or_else(|| nodes.first().map(|n| n.id.clone()));
4795    if let Some(source) = source {
4796        edges.push(Edge {
4797            id: None,
4798            source,
4799            target: node.id.clone(),
4800            kind: EdgeKind::Contains,
4801            line: None,
4802            col: None,
4803            provenance: Some(provenance.into()),
4804        });
4805    }
4806}
4807
4808fn unresolved(
4809    from: &str,
4810    name: &str,
4811    kind: EdgeKind,
4812    file_path: &str,
4813    language: Language,
4814    line: i64,
4815) -> UnresolvedReference {
4816    UnresolvedReference {
4817        from_node_id: from.to_string(),
4818        reference_name: name.to_string(),
4819        reference_kind: kind,
4820        line,
4821        column: 0,
4822        file_path: file_path.to_string(),
4823        language,
4824    }
4825}
4826
4827fn refs_push(
4828    refs: &mut Vec<UnresolvedReference>,
4829    from: &str,
4830    name: &str,
4831    kind: EdgeKind,
4832    file_path: &str,
4833    language: Language,
4834    line: i64,
4835    column: i64,
4836) {
4837    if !name.is_empty() {
4838        refs.push(UnresolvedReference {
4839            from_node_id: from.to_string(),
4840            reference_name: name.to_string(),
4841            reference_kind: kind,
4842            line,
4843            column,
4844            file_path: file_path.to_string(),
4845            language,
4846        });
4847    }
4848}
4849
4850fn named_children(node: SyntaxNode) -> Vec<SyntaxNode> {
4851    (0..node.named_child_count())
4852        .filter_map(|i| node.named_child(i as u32))
4853        .collect()
4854}
4855
4856fn node_text<'a>(node: SyntaxNode, source: &'a str) -> &'a str {
4857    source.get(node.byte_range()).unwrap_or_default()
4858}
4859
4860fn child_text_by_kind<'a>(node: SyntaxNode, source: &'a str, kinds: &[&str]) -> Option<&'a str> {
4861    named_children(node)
4862        .into_iter()
4863        .find(|child| kinds.contains(&child.kind()))
4864        .map(|child| node_text(child, source))
4865}
4866
4867fn descendant_text_by_kind<'a>(
4868    node: SyntaxNode,
4869    source: &'a str,
4870    kinds: &[&str],
4871) -> Option<&'a str> {
4872    if kinds.contains(&node.kind()) {
4873        return Some(node_text(node, source));
4874    }
4875    for child in named_children(node) {
4876        if let Some(text) = descendant_text_by_kind(child, source, kinds) {
4877            return Some(text);
4878        }
4879    }
4880    None
4881}
4882
4883fn rust_node_name(node: SyntaxNode, source: &str, kind: NodeKind) -> Option<String> {
4884    if kind == NodeKind::Import {
4885        return rust_import_root(node, source);
4886    }
4887    if kind == NodeKind::Variable && node.kind() == "let_declaration" {
4888        return descendant_text_by_kind(node, source, &["identifier"]).map(clean_symbol_name);
4889    }
4890    if kind == NodeKind::Field {
4891        return child_text_by_kind(node, source, &["field_identifier", "identifier"])
4892            .map(clean_symbol_name);
4893    }
4894    node.child_by_field_name("name")
4895        .map(|n| clean_symbol_name(node_text(n, source)))
4896        .or_else(|| {
4897            child_text_by_kind(
4898                node,
4899                source,
4900                &["identifier", "type_identifier", "field_identifier"],
4901            )
4902            .map(clean_symbol_name)
4903        })
4904}
4905
4906fn rust_is_public(node: SyntaxNode, source: &str) -> bool {
4907    node_text(node, source).trim_start().starts_with("pub")
4908        || named_children(node).into_iter().any(|child| {
4909            child.kind() == "visibility_modifier" && node_text(child, source).contains("pub")
4910        })
4911}
4912
4913fn rust_receiver_type(node: SyntaxNode, source: &str) -> Option<String> {
4914    let mut parent = node.parent();
4915    while let Some(p) = parent {
4916        if p.kind() == "impl_item" {
4917            let mut direct = named_children(p)
4918                .into_iter()
4919                .filter(|child| {
4920                    matches!(
4921                        child.kind(),
4922                        "type_identifier" | "generic_type" | "scoped_type_identifier"
4923                    )
4924                })
4925                .collect::<Vec<_>>();
4926            if let Some(last) = direct.pop() {
4927                return Some(clean_type_name(node_text(last, source)));
4928            }
4929            return descendant_text_by_kind(p, source, &["type_identifier"]).map(clean_type_name);
4930        }
4931        parent = p.parent();
4932    }
4933    None
4934}
4935
4936fn rust_impl_trait_for_type(node: SyntaxNode, source: &str) -> Option<(String, String)> {
4937    if node.kind() != "impl_item" || !node_text(node, source).contains(" for ") {
4938        return None;
4939    }
4940    let names: Vec<String> = named_children(node)
4941        .into_iter()
4942        .filter(|child| {
4943            matches!(
4944                child.kind(),
4945                "type_identifier" | "generic_type" | "scoped_type_identifier"
4946            )
4947        })
4948        .map(|child| clean_type_name(node_text(child, source)))
4949        .collect();
4950    if names.len() >= 2 {
4951        Some((names[0].clone(), names[names.len() - 1].clone()))
4952    } else {
4953        None
4954    }
4955}
4956
4957fn rust_import_root(node: SyntaxNode, source: &str) -> Option<String> {
4958    let text = node_text(node, source)
4959        .trim()
4960        .strip_prefix("use")
4961        .unwrap_or(node_text(node, source))
4962        .trim()
4963        .trim_end_matches(';')
4964        .trim();
4965    text.split("::")
4966        .next()
4967        .map(|s| s.trim_matches('{').trim().to_string())
4968        .filter(|s| !s.is_empty())
4969}
4970
4971fn callable_name(node: SyntaxNode, source: &str) -> Option<String> {
4972    match node.kind() {
4973        "identifier" | "field_identifier" => Some(clean_symbol_name(node_text(node, source))),
4974        "scoped_identifier" => node_text(node, source)
4975            .rsplit("::")
4976            .next()
4977            .map(clean_symbol_name),
4978        "field_expression" => node
4979            .child_by_field_name("field")
4980            .map(|field| clean_symbol_name(node_text(field, source))),
4981        "generic_function" => named_children(node)
4982            .into_iter()
4983            .find_map(|child| callable_name(child, source)),
4984        _ => None,
4985    }
4986}
4987
4988fn moonbit_node_name(node: SyntaxNode, source: &str, kind: NodeKind) -> Option<String> {
4989    match kind {
4990        NodeKind::Function | NodeKind::Method => child_text_by_kind(
4991            node,
4992            source,
4993            &["function_identifier", "lowercase_identifier", "identifier"],
4994        )
4995        .map(|s| clean_symbol_name(s.rsplit("::").next().unwrap_or(s))),
4996        NodeKind::Struct | NodeKind::Trait | NodeKind::Enum => child_text_by_kind(
4997            node,
4998            source,
4999            &[
5000                "identifier",
5001                "type_identifier",
5002                "type_name",
5003                "uppercase_identifier",
5004            ],
5005        )
5006        .map(clean_symbol_name),
5007        NodeKind::EnumMember => child_text_by_kind(
5008            node,
5009            source,
5010            &["uppercase_identifier", "identifier", "type_name"],
5011        )
5012        .map(clean_symbol_name),
5013        NodeKind::TypeAlias => descendant_text_by_kind(
5014            node,
5015            source,
5016            &[
5017                "type_identifier",
5018                "type_name",
5019                "identifier",
5020                "uppercase_identifier",
5021            ],
5022        )
5023        .map(clean_symbol_name),
5024        NodeKind::Constant => {
5025            child_text_by_kind(node, source, &["uppercase_identifier", "identifier"])
5026                .map(clean_symbol_name)
5027        }
5028        NodeKind::Import => moonbit_import_name(node, source),
5029        NodeKind::Module => node
5030            .named_child(0)
5031            .map(|child| clean_quoted(node_text(child, source))),
5032        _ => None,
5033    }
5034}
5035
5036fn moonbit_is_public(node: SyntaxNode, source: &str) -> bool {
5037    named_children(node)
5038        .into_iter()
5039        .any(|child| child.kind() == "visibility" && node_text(child, source).contains("pub"))
5040        || node_text(node, source).trim_start().starts_with("pub ")
5041}
5042
5043fn moonbit_impl_owner(node: SyntaxNode, source: &str) -> Option<String> {
5044    child_text_by_kind(
5045        node,
5046        source,
5047        &["type_name", "type_identifier", "qualified_type_identifier"],
5048    )
5049    .map(clean_type_name)
5050}
5051
5052fn moonbit_import_name(node: SyntaxNode, source: &str) -> Option<String> {
5053    if node.kind() == "import_declaration" {
5054        return named_children(node)
5055            .into_iter()
5056            .find(|child| child.kind() == "import_item")
5057            .and_then(|child| moonbit_import_name(child, source));
5058    }
5059    named_children(node)
5060        .into_iter()
5061        .find(|child| child.kind() == "string_literal")
5062        .map(|child| clean_quoted(node_text(child, source)))
5063}
5064
5065fn moonbit_call_name(node: SyntaxNode, source: &str) -> Option<String> {
5066    for child in named_children(node) {
5067        match child.kind() {
5068            "qualified_identifier" | "function_identifier" | "method_expression" => {
5069                let text = node_text(child, source);
5070                let name = text
5071                    .rsplit(['.', ':'])
5072                    .find(|part| !part.is_empty())
5073                    .unwrap_or(text);
5074                return Some(clean_symbol_name(name));
5075            }
5076            "lowercase_identifier" | "identifier" => {
5077                return Some(clean_symbol_name(node_text(child, source)));
5078            }
5079            _ => {}
5080        }
5081    }
5082    None
5083}
5084
5085fn enclosing_callable(nodes: &[Node], line: i64) -> Option<&Node> {
5086    nodes
5087        .iter()
5088        .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
5089        .filter(|n| n.start_line <= line && line <= n.end_line.max(n.start_line))
5090        .min_by_key(|n| n.end_line - n.start_line)
5091}
5092
5093fn clean_symbol_name(s: &str) -> String {
5094    s.trim()
5095        .trim_matches('"')
5096        .trim_matches('\'')
5097        .trim_start_matches('.')
5098        .to_string()
5099}
5100
5101fn clean_quoted(s: &str) -> String {
5102    s.trim().trim_matches('"').trim_matches('\'').to_string()
5103}
5104
5105fn clean_type_name(s: &str) -> String {
5106    let s = s.trim();
5107    let before_generics = s.split('<').next().unwrap_or(s);
5108    before_generics
5109        .rsplit("::")
5110        .next()
5111        .unwrap_or(before_generics)
5112        .trim()
5113        .to_string()
5114}
5115
5116fn line_for(source: &str, idx: usize) -> i64 {
5117    source[..idx.min(source.len())]
5118        .bytes()
5119        .filter(|b| *b == b'\n')
5120        .count() as i64
5121        + 1
5122}
5123
5124fn extract_mbt_markdown_code_with_padding(source: &str) -> String {
5125    let mut out = String::new();
5126    let mut in_mbt = false;
5127    for line in source.lines() {
5128        let trimmed = line.trim_start();
5129        if trimmed.starts_with("```") {
5130            in_mbt = trimmed.contains("mbt");
5131            out.push('\n');
5132            continue;
5133        }
5134        if in_mbt {
5135            out.push_str(line);
5136        }
5137        out.push('\n');
5138    }
5139    out
5140}
5141
5142fn strip_moonbit_comments_preserve_lines(source: &str) -> String {
5143    let mut out = String::with_capacity(source.len());
5144    let mut chars = source.chars().peekable();
5145    let mut in_string = false;
5146    let mut escaped = false;
5147    while let Some(ch) = chars.next() {
5148        if in_string {
5149            out.push(ch);
5150            if escaped {
5151                escaped = false;
5152            } else if ch == '\\' {
5153                escaped = true;
5154            } else if ch == '"' {
5155                in_string = false;
5156            }
5157            continue;
5158        }
5159
5160        if ch == '"' {
5161            in_string = true;
5162            out.push(ch);
5163            continue;
5164        }
5165
5166        if ch == '/' && chars.peek() == Some(&'/') {
5167            chars.next();
5168            out.push(' ');
5169            out.push(' ');
5170            for next in chars.by_ref() {
5171                if next == '\n' {
5172                    out.push('\n');
5173                    break;
5174                }
5175                out.push(' ');
5176            }
5177            continue;
5178        }
5179
5180        if ch == '/' && chars.peek() == Some(&'*') {
5181            chars.next();
5182            out.push(' ');
5183            out.push(' ');
5184            let mut prev = '\0';
5185            for next in chars.by_ref() {
5186                if next == '\n' {
5187                    out.push('\n');
5188                } else {
5189                    out.push(' ');
5190                }
5191                if prev == '*' && next == '/' {
5192                    break;
5193                }
5194                prev = next;
5195            }
5196            continue;
5197        }
5198
5199        out.push(ch);
5200    }
5201    out
5202}
5203
5204fn now_ms() -> i64 {
5205    std::time::SystemTime::now()
5206        .duration_since(std::time::UNIX_EPOCH)
5207        .map(|d| d.as_millis() as i64)
5208        .unwrap_or_default()
5209}