Skip to main content

codegraph/
extraction.rs

1use crate::config::CodeGraphConfig;
2use crate::types::*;
3use regex::Regex;
4use std::path::Path;
5use tree_sitter::{Node as SyntaxNode, Parser};
6
7pub fn should_include_file(path: &Path, config: &CodeGraphConfig) -> bool {
8    let s = path.to_string_lossy().replace('\\', "/");
9    if s.starts_with(".codegraph/") {
10        return false;
11    }
12    if config.exclude.iter().any(|p| glob_match(p, &s)) {
13        return false;
14    }
15    config.include.iter().any(|p| glob_match(p, &s))
16}
17
18fn glob_match(pattern: &str, path: &str) -> bool {
19    let suffix = pattern.strip_prefix("**/*.");
20    if let Some(ext) = suffix {
21        return path.ends_with(&format!(".{}", ext));
22    }
23    if let Some(dir) = pattern
24        .strip_prefix("**/")
25        .and_then(|p| p.strip_suffix("/**"))
26    {
27        return path.contains(&format!("{}/", dir)) || path == dir;
28    }
29    if let Some(suffix) = pattern.strip_prefix("**/") {
30        return path.ends_with(suffix);
31    }
32    pattern == path
33}
34
35pub fn detect_language(path: &Path, _source: &str) -> Language {
36    let name = path
37        .file_name()
38        .and_then(|s| s.to_str())
39        .unwrap_or_default()
40        .to_lowercase();
41    if name == "moon.mod.json" || name == "moon.pkg.json" || name == "moon.pkg" {
42        return Language::MoonBit;
43    }
44    if name.ends_with(".mbt.md") {
45        return Language::MoonBit;
46    }
47    match path
48        .extension()
49        .and_then(|s| s.to_str())
50        .unwrap_or_default()
51        .to_lowercase()
52        .as_str()
53    {
54        "ts" => Language::TypeScript,
55        "tsx" => Language::Tsx,
56        "js" | "mjs" | "cjs" => Language::JavaScript,
57        "jsx" => Language::Jsx,
58        "py" | "pyw" => Language::Python,
59        "go" => Language::Go,
60        "rs" => Language::Rust,
61        "java" => Language::Java,
62        "c" | "h" => Language::C,
63        "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Language::Cpp,
64        "cs" => Language::CSharp,
65        "php" => Language::Php,
66        "rb" | "rake" => Language::Ruby,
67        "swift" => Language::Swift,
68        "kt" | "kts" => Language::Kotlin,
69        "dart" => Language::Dart,
70        "svelte" => Language::Svelte,
71        "vue" => Language::Vue,
72        "liquid" => Language::Liquid,
73        "pas" | "dpr" | "dpk" | "lpr" | "dfm" | "fmx" => Language::Pascal,
74        "scala" | "sc" => Language::Scala,
75        "mbt" | "mbti" => Language::MoonBit,
76        _ => Language::Unknown,
77    }
78}
79
80pub fn extract_from_source(path: &Path, source: &str, language: Language) -> ExtractionResult {
81    let file_path = path.to_string_lossy().replace('\\', "/");
82    let now = now_ms();
83    let mut nodes = vec![Node {
84        id: format!("file:{}", file_path),
85        kind: NodeKind::File,
86        name: path
87            .file_name()
88            .and_then(|s| s.to_str())
89            .unwrap_or(&file_path)
90            .to_string(),
91        qualified_name: file_path.clone(),
92        file_path: file_path.clone(),
93        language,
94        start_line: 1,
95        end_line: source.lines().count().max(1) as i64,
96        start_column: 0,
97        end_column: 0,
98        docstring: None,
99        signature: None,
100        visibility: None,
101        is_exported: false,
102        is_async: false,
103        is_static: false,
104        is_abstract: false,
105        updated_at: now,
106    }];
107    let mut edges = Vec::new();
108    let mut refs = Vec::new();
109
110    match language {
111        Language::Rust => extract_rust(&file_path, source, now, &mut nodes, &mut edges, &mut refs),
112        Language::MoonBit => {
113            extract_moonbit(&file_path, source, now, &mut nodes, &mut edges, &mut refs)
114        }
115        _ => extract_generic(
116            &file_path, source, language, now, &mut nodes, &mut edges, &mut refs,
117        ),
118    }
119
120    ExtractionResult {
121        nodes,
122        edges,
123        unresolved_references: refs,
124    }
125}
126
127fn extract_rust(
128    file_path: &str,
129    source: &str,
130    now: i64,
131    nodes: &mut Vec<Node>,
132    edges: &mut Vec<Edge>,
133    refs: &mut Vec<UnresolvedReference>,
134) {
135    if try_extract_rust_tree_sitter(file_path, source, now, nodes, edges, refs) {
136        return;
137    }
138
139    add_regex_nodes(
140        file_path,
141        source,
142        Language::Rust,
143        now,
144        nodes,
145        edges,
146        r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*([^{;]*)",
147        NodeKind::Function,
148    );
149    add_regex_nodes(
150        file_path,
151        source,
152        Language::Rust,
153        now,
154        nodes,
155        edges,
156        r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?struct\s+([A-Za-z_][A-Za-z0-9_]*)",
157        NodeKind::Struct,
158    );
159    add_regex_nodes(
160        file_path,
161        source,
162        Language::Rust,
163        now,
164        nodes,
165        edges,
166        r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?trait\s+([A-Za-z_][A-Za-z0-9_]*)",
167        NodeKind::Trait,
168    );
169    add_regex_nodes(
170        file_path,
171        source,
172        Language::Rust,
173        now,
174        nodes,
175        edges,
176        r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?enum\s+([A-Za-z_][A-Za-z0-9_]*)",
177        NodeKind::Enum,
178    );
179    add_regex_nodes(
180        file_path,
181        source,
182        Language::Rust,
183        now,
184        nodes,
185        edges,
186        r"(?m)^\s*(pub(?:\([^)]*\))?\s+)?type\s+([A-Za-z_][A-Za-z0-9_]*)",
187        NodeKind::TypeAlias,
188    );
189
190    let use_re = Regex::new(r"(?m)^\s*use\s+([^;]+);").unwrap();
191    for cap in use_re.captures_iter(source) {
192        let full = cap.get(1).unwrap();
193        let root = full
194            .as_str()
195            .split("::")
196            .next()
197            .unwrap_or(full.as_str())
198            .trim_matches('{')
199            .trim();
200        let node = make_node(
201            file_path,
202            Language::Rust,
203            NodeKind::Import,
204            root,
205            line_for(source, full.start()),
206            0,
207            now,
208            Some(format!("use {};", full.as_str())),
209        );
210        add_contains(nodes, edges, &node);
211        refs.push(unresolved(
212            &nodes[0].id,
213            root,
214            EdgeKind::Imports,
215            file_path,
216            Language::Rust,
217            node.start_line,
218        ));
219        nodes.push(node);
220    }
221
222    let impl_re = Regex::new(
223        r"(?m)^\s*impl(?:<[^>]+>)?\s+([A-Za-z_][A-Za-z0-9_:]*)\s+for\s+([A-Za-z_][A-Za-z0-9_]*)",
224    )
225    .unwrap();
226    for cap in impl_re.captures_iter(source) {
227        let trait_name = cap.get(1).unwrap().as_str().rsplit("::").next().unwrap();
228        let type_name = cap.get(2).unwrap().as_str();
229        if let Some(src) = nodes
230            .iter()
231            .find(|n| n.name == type_name && matches!(n.kind, NodeKind::Struct | NodeKind::Enum))
232            .map(|n| n.id.clone())
233        {
234            refs.push(unresolved(
235                &src,
236                trait_name,
237                EdgeKind::Implements,
238                file_path,
239                Language::Rust,
240                line_for(source, cap.get(1).unwrap().start()),
241            ));
242        }
243    }
244    add_call_refs(
245        file_path,
246        source,
247        Language::Rust,
248        nodes,
249        refs,
250        r"([A-Za-z_][A-Za-z0-9_:]*)\s*\(",
251    );
252}
253
254fn extract_moonbit(
255    file_path: &str,
256    source: &str,
257    now: i64,
258    nodes: &mut Vec<Node>,
259    edges: &mut Vec<Edge>,
260    refs: &mut Vec<UnresolvedReference>,
261) {
262    if file_path.ends_with("moon.mod.json")
263        || file_path.ends_with("moon.pkg.json")
264        || file_path.ends_with("moon.pkg")
265    {
266        extract_moonbit_metadata(file_path, source, now, nodes, edges, refs);
267        return;
268    }
269
270    let source = if file_path.ends_with(".mbt.md") {
271        extract_mbt_markdown_code_with_padding(source)
272    } else {
273        source.to_string()
274    };
275
276    if try_extract_moonbit_tree_sitter(file_path, &source, now, nodes, edges, refs) {
277        return;
278    }
279
280    add_regex_nodes(
281        file_path,
282        &source,
283        Language::MoonBit,
284        now,
285        nodes,
286        edges,
287        r"(?m)^\s*(pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*([^{]*)",
288        NodeKind::Function,
289    );
290    add_regex_nodes(
291        file_path,
292        &source,
293        Language::MoonBit,
294        now,
295        nodes,
296        edges,
297        r"(?m)^\s*(pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*::[A-Za-z_][A-Za-z0-9_]*)\s*([^{]*)",
298        NodeKind::Method,
299    );
300    add_regex_nodes(
301        file_path,
302        &source,
303        Language::MoonBit,
304        now,
305        nodes,
306        edges,
307        r"(?m)^\s*(pub\s+)?struct\s+([A-Za-z_][A-Za-z0-9_]*)",
308        NodeKind::Struct,
309    );
310    add_regex_nodes(
311        file_path,
312        &source,
313        Language::MoonBit,
314        now,
315        nodes,
316        edges,
317        r"(?m)^\s*(pub\s+)?trait\s+([A-Za-z_][A-Za-z0-9_]*)",
318        NodeKind::Trait,
319    );
320    add_regex_nodes(
321        file_path,
322        &source,
323        Language::MoonBit,
324        now,
325        nodes,
326        edges,
327        r"(?m)^\s*(pub\s+)?enum\s+([A-Za-z_][A-Za-z0-9_]*)",
328        NodeKind::Enum,
329    );
330    add_regex_nodes(
331        file_path,
332        &source,
333        Language::MoonBit,
334        now,
335        nodes,
336        edges,
337        r"(?m)^\s*(pub\s+)?type\s+([A-Za-z_][A-Za-z0-9_]*)",
338        NodeKind::TypeAlias,
339    );
340    add_regex_nodes(
341        file_path,
342        &source,
343        Language::MoonBit,
344        now,
345        nodes,
346        edges,
347        r"(?m)^\s*(pub\s+)?let\s+([A-Za-z_][A-Za-z0-9_]*)",
348        NodeKind::Variable,
349    );
350
351    let import_re =
352        Regex::new(r#"(?m)^\s*import\s+([@\w/.\-]+)(?:\s+as\s+([A-Za-z_][A-Za-z0-9_]*))?"#)
353            .unwrap();
354    for cap in import_re.captures_iter(&source) {
355        let package = cap.get(1).unwrap().as_str();
356        let name = cap.get(2).map(|m| m.as_str()).unwrap_or(package);
357        let node = make_node(
358            file_path,
359            Language::MoonBit,
360            NodeKind::Import,
361            name,
362            line_for(&source, cap.get(0).unwrap().start()),
363            0,
364            now,
365            Some(cap.get(0).unwrap().as_str().to_string()),
366        );
367        add_contains(nodes, edges, &node);
368        refs.push(unresolved(
369            &nodes[0].id,
370            name,
371            EdgeKind::Imports,
372            file_path,
373            Language::MoonBit,
374            node.start_line,
375        ));
376        nodes.push(node);
377    }
378    add_call_refs(
379        file_path,
380        &source,
381        Language::MoonBit,
382        nodes,
383        refs,
384        r"([@A-Za-z_][@A-Za-z0-9_:/]*)\s*\(",
385    );
386}
387
388fn extract_moonbit_metadata(
389    file_path: &str,
390    source: &str,
391    now: i64,
392    nodes: &mut Vec<Node>,
393    edges: &mut Vec<Edge>,
394    refs: &mut Vec<UnresolvedReference>,
395) {
396    let Ok(json) = serde_json::from_str::<serde_json::Value>(source) else {
397        return;
398    };
399    if file_path.ends_with("moon.mod.json") {
400        if let Some(name) = json.get("name").and_then(|v| v.as_str()) {
401            let node = make_node(
402                file_path,
403                Language::MoonBit,
404                NodeKind::Module,
405                name,
406                1,
407                0,
408                now,
409                Some("moon.mod.json".into()),
410            );
411            add_contains(nodes, edges, &node);
412            nodes.push(node);
413        }
414        return;
415    }
416
417    let package_name = json
418        .get("name")
419        .and_then(|v| v.as_str())
420        .or_else(|| file_path.rsplit('/').nth(1))
421        .unwrap_or("moonbit-package");
422    let node = make_node(
423        file_path,
424        Language::MoonBit,
425        NodeKind::Module,
426        package_name,
427        1,
428        0,
429        now,
430        Some(file_path.rsplit('/').next().unwrap_or("moon.pkg").into()),
431    );
432    add_contains(nodes, edges, &node);
433    let package_node_id = node.id.clone();
434    nodes.push(node);
435
436    if let Some(imports) = json.get("import").or_else(|| json.get("imports")) {
437        if let Some(obj) = imports.as_object() {
438            for (alias, value) in obj {
439                let target = value.as_str().unwrap_or(alias);
440                let import_node = make_node(
441                    file_path,
442                    Language::MoonBit,
443                    NodeKind::Import,
444                    alias,
445                    1,
446                    0,
447                    now,
448                    Some(target.to_string()),
449                );
450                add_contains(nodes, edges, &import_node);
451                refs.push(unresolved(
452                    &package_node_id,
453                    alias,
454                    EdgeKind::Imports,
455                    file_path,
456                    Language::MoonBit,
457                    1,
458                ));
459                nodes.push(import_node);
460            }
461        }
462    }
463}
464
465fn try_extract_rust_tree_sitter(
466    file_path: &str,
467    source: &str,
468    now: i64,
469    nodes: &mut Vec<Node>,
470    edges: &mut Vec<Edge>,
471    refs: &mut Vec<UnresolvedReference>,
472) -> bool {
473    let mut parser = Parser::new();
474    if parser
475        .set_language(&tree_sitter_rust::LANGUAGE.into())
476        .is_err()
477    {
478        return false;
479    }
480    let Some(tree) = parser.parse(source, None) else {
481        return false;
482    };
483    if tree.root_node().has_error() {
484        return false;
485    }
486
487    let root = tree.root_node();
488    let mut stack = Vec::new();
489    collect_rust_nodes(file_path, source, root, now, nodes, edges, refs, &mut stack);
490    collect_rust_refs(file_path, source, root, nodes, refs);
491    true
492}
493
494fn collect_rust_nodes(
495    file_path: &str,
496    source: &str,
497    node: SyntaxNode,
498    now: i64,
499    nodes: &mut Vec<Node>,
500    edges: &mut Vec<Edge>,
501    refs: &mut Vec<UnresolvedReference>,
502    stack: &mut Vec<String>,
503) {
504    let kind = match node.kind() {
505        "function_item" => {
506            if rust_receiver_type(node, source).is_some() {
507                Some(NodeKind::Method)
508            } else {
509                Some(NodeKind::Function)
510            }
511        }
512        "struct_item" => Some(NodeKind::Struct),
513        "trait_item" => Some(NodeKind::Trait),
514        "enum_item" => Some(NodeKind::Enum),
515        "enum_variant" => Some(NodeKind::EnumMember),
516        "type_item" => Some(NodeKind::TypeAlias),
517        "const_item" => Some(NodeKind::Constant),
518        "static_item" => Some(NodeKind::Variable),
519        "let_declaration" => Some(NodeKind::Variable),
520        "field_declaration" => Some(NodeKind::Field),
521        "function_signature_item" => Some(NodeKind::Method),
522        "use_declaration" => Some(NodeKind::Import),
523        "mod_item" => Some(NodeKind::Module),
524        _ => None,
525    };
526
527    let mut pushed = false;
528    if let Some(kind) = kind {
529        if let Some(name) = rust_node_name(node, source, kind) {
530            let signature = Some(
531                node_text(node, source)
532                    .lines()
533                    .next()
534                    .unwrap_or("")
535                    .trim()
536                    .to_string(),
537            );
538            let mut out =
539                make_node_span(file_path, Language::Rust, kind, &name, node, now, signature);
540            out.is_exported = rust_is_public(node, source);
541            out.visibility = if out.is_exported {
542                Some("public".into())
543            } else if matches!(
544                kind,
545                NodeKind::Function
546                    | NodeKind::Method
547                    | NodeKind::Struct
548                    | NodeKind::Trait
549                    | NodeKind::Enum
550                    | NodeKind::TypeAlias
551            ) {
552                Some("private".into())
553            } else {
554                None
555            };
556            out.is_async = node_text(node, source).trim_start().starts_with("async ")
557                || node_text(node, source).contains(" async fn ");
558            if kind == NodeKind::Method {
559                if let Some(owner) = rust_receiver_type(node, source) {
560                    out.qualified_name = format!("{owner}::{name}");
561                }
562            }
563            add_contains_from_stack(nodes, edges, stack, &out, "tree-sitter");
564            let id = out.id.clone();
565            nodes.push(out);
566            if matches!(
567                kind,
568                NodeKind::Struct
569                    | NodeKind::Trait
570                    | NodeKind::Enum
571                    | NodeKind::Module
572                    | NodeKind::Function
573                    | NodeKind::Method
574            ) {
575                stack.push(id);
576                pushed = true;
577            }
578        }
579    }
580
581    if node.kind() == "impl_item" {
582        if let Some((trait_name, type_name)) = rust_impl_trait_for_type(node, source) {
583            if let Some(type_node) = nodes.iter().find(|n| {
584                n.name == type_name
585                    && matches!(n.kind, NodeKind::Struct | NodeKind::Enum | NodeKind::Trait)
586            }) {
587                refs_push(
588                    refs,
589                    &type_node.id,
590                    &trait_name,
591                    EdgeKind::Implements,
592                    file_path,
593                    Language::Rust,
594                    node.start_position().row as i64 + 1,
595                    node.start_position().column as i64,
596                );
597            }
598        }
599    }
600
601    for child in named_children(node) {
602        collect_rust_nodes(file_path, source, child, now, nodes, edges, refs, stack);
603    }
604
605    if pushed {
606        stack.pop();
607    }
608}
609
610fn collect_rust_refs(
611    file_path: &str,
612    source: &str,
613    node: SyntaxNode,
614    nodes: &[Node],
615    refs: &mut Vec<UnresolvedReference>,
616) {
617    match node.kind() {
618        "use_declaration" => {
619            if let Some(name) = rust_import_root(node, source) {
620                refs_push(
621                    refs,
622                    &format!("file:{file_path}"),
623                    &name,
624                    EdgeKind::Imports,
625                    file_path,
626                    Language::Rust,
627                    node.start_position().row as i64 + 1,
628                    node.start_position().column as i64,
629                );
630            }
631        }
632        "call_expression" => {
633            if let Some(function) = node.child_by_field_name("function") {
634                if let Some(name) = callable_name(function, source) {
635                    if let Some(caller) =
636                        enclosing_callable(nodes, node.start_position().row as i64 + 1)
637                    {
638                        refs_push(
639                            refs,
640                            &caller.id,
641                            &name,
642                            EdgeKind::Calls,
643                            file_path,
644                            Language::Rust,
645                            node.start_position().row as i64 + 1,
646                            node.start_position().column as i64,
647                        );
648                    }
649                }
650            }
651        }
652        _ => {}
653    }
654
655    for child in named_children(node) {
656        collect_rust_refs(file_path, source, child, nodes, refs);
657    }
658}
659
660fn try_extract_moonbit_tree_sitter(
661    file_path: &str,
662    source: &str,
663    now: i64,
664    nodes: &mut Vec<Node>,
665    edges: &mut Vec<Edge>,
666    refs: &mut Vec<UnresolvedReference>,
667) -> bool {
668    let mut parser = Parser::new();
669    if parser
670        .set_language(&tree_sitter_moonbit::LANGUAGE.into())
671        .is_err()
672    {
673        return false;
674    }
675    let Some(tree) = parser.parse(source, None) else {
676        return false;
677    };
678    if tree.root_node().has_error() {
679        return false;
680    }
681
682    let root = tree.root_node();
683    let mut stack = Vec::new();
684    collect_moonbit_nodes(file_path, source, root, now, nodes, edges, &mut stack);
685    collect_moonbit_refs(file_path, source, root, nodes, refs);
686    true
687}
688
689fn collect_moonbit_nodes(
690    file_path: &str,
691    source: &str,
692    node: SyntaxNode,
693    now: i64,
694    nodes: &mut Vec<Node>,
695    edges: &mut Vec<Edge>,
696    stack: &mut Vec<String>,
697) {
698    let kind = match node.kind() {
699        "function_definition" => Some(NodeKind::Function),
700        "impl_definition" => Some(NodeKind::Method),
701        "struct_definition" | "tuple_struct_definition" => Some(NodeKind::Struct),
702        "trait_definition" => Some(NodeKind::Trait),
703        "trait_method_declaration" => Some(NodeKind::Method),
704        "enum_definition" => Some(NodeKind::Enum),
705        "enum_constructor" => Some(NodeKind::EnumMember),
706        "type_alias_definition" | "type_definition" => Some(NodeKind::TypeAlias),
707        "const_definition" => Some(NodeKind::Constant),
708        "import_declaration" => Some(NodeKind::Import),
709        "package_declaration" => Some(NodeKind::Module),
710        _ => None,
711    };
712
713    let mut pushed = false;
714    if let Some(kind) = kind {
715        if let Some(name) = moonbit_node_name(node, source, kind) {
716            let signature = Some(
717                node_text(node, source)
718                    .lines()
719                    .next()
720                    .unwrap_or("")
721                    .trim()
722                    .to_string(),
723            );
724            let mut out = make_node_span(
725                file_path,
726                Language::MoonBit,
727                kind,
728                &name,
729                node,
730                now,
731                signature,
732            );
733            out.is_exported = moonbit_is_public(node, source);
734            out.visibility = if out.is_exported {
735                Some("public".into())
736            } else {
737                None
738            };
739            if kind == NodeKind::Method {
740                if let Some(owner) = moonbit_impl_owner(node, source) {
741                    out.qualified_name = format!("{owner}::{name}");
742                }
743            }
744            add_contains_from_stack(nodes, edges, stack, &out, "tree-sitter");
745            let id = out.id.clone();
746            nodes.push(out);
747            if matches!(
748                kind,
749                NodeKind::Struct
750                    | NodeKind::Trait
751                    | NodeKind::Enum
752                    | NodeKind::Module
753                    | NodeKind::Function
754                    | NodeKind::Method
755            ) {
756                stack.push(id);
757                pushed = true;
758            }
759        }
760    }
761
762    for child in named_children(node) {
763        collect_moonbit_nodes(file_path, source, child, now, nodes, edges, stack);
764    }
765
766    if pushed {
767        stack.pop();
768    }
769}
770
771fn collect_moonbit_refs(
772    file_path: &str,
773    source: &str,
774    node: SyntaxNode,
775    nodes: &[Node],
776    refs: &mut Vec<UnresolvedReference>,
777) {
778    match node.kind() {
779        "import_declaration" => {
780            for child in named_children(node) {
781                if child.kind() == "import_item" {
782                    if let Some(name) = moonbit_import_name(child, source) {
783                        refs_push(
784                            refs,
785                            &format!("file:{file_path}"),
786                            &name,
787                            EdgeKind::Imports,
788                            file_path,
789                            Language::MoonBit,
790                            child.start_position().row as i64 + 1,
791                            child.start_position().column as i64,
792                        );
793                    }
794                }
795            }
796        }
797        "apply_expression" | "dot_apply_expression" | "dot_dot_apply_expression" => {
798            if let Some(name) = moonbit_call_name(node, source) {
799                if let Some(caller) =
800                    enclosing_callable(nodes, node.start_position().row as i64 + 1)
801                {
802                    refs_push(
803                        refs,
804                        &caller.id,
805                        &name,
806                        EdgeKind::Calls,
807                        file_path,
808                        Language::MoonBit,
809                        node.start_position().row as i64 + 1,
810                        node.start_position().column as i64,
811                    );
812                }
813            }
814        }
815        _ => {}
816    }
817
818    for child in named_children(node) {
819        collect_moonbit_refs(file_path, source, child, nodes, refs);
820    }
821}
822
823fn extract_generic(
824    file_path: &str,
825    source: &str,
826    language: Language,
827    now: i64,
828    nodes: &mut Vec<Node>,
829    edges: &mut Vec<Edge>,
830    refs: &mut Vec<UnresolvedReference>,
831) {
832    add_regex_nodes(
833        file_path,
834        source,
835        language,
836        now,
837        nodes,
838        edges,
839        r"(?m)^\s*(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)",
840        NodeKind::Function,
841    );
842    add_regex_nodes(
843        file_path,
844        source,
845        language,
846        now,
847        nodes,
848        edges,
849        r"(?m)^\s*(?:export\s+)?class\s+([A-Za-z_$][A-Za-z0-9_$]*)",
850        NodeKind::Class,
851    );
852    add_call_refs(
853        file_path,
854        source,
855        language,
856        nodes,
857        refs,
858        r"([A-Za-z_$][A-Za-z0-9_$.]*)\s*\(",
859    );
860}
861
862fn add_regex_nodes(
863    file_path: &str,
864    source: &str,
865    language: Language,
866    now: i64,
867    nodes: &mut Vec<Node>,
868    edges: &mut Vec<Edge>,
869    pattern: &str,
870    kind: NodeKind,
871) {
872    let re = Regex::new(pattern).unwrap();
873    for cap in re.captures_iter(source) {
874        let Some(name_match) = cap.get(2).or_else(|| cap.get(1)) else {
875            continue;
876        };
877        let mut name = name_match.as_str().to_string();
878        if kind == NodeKind::Method && name.contains("::") {
879            name = name.rsplit("::").next().unwrap_or(&name).to_string();
880        }
881        let signature = cap.get(0).map(|m| m.as_str().trim().to_string());
882        let line = line_for(source, name_match.start());
883        let mut node = make_node(file_path, language, kind, &name, line, 0, now, signature);
884        node.is_exported = cap
885            .get(1)
886            .map(|m| m.as_str().contains("pub") || m.as_str().contains("export"))
887            .unwrap_or(false);
888        node.visibility = if node.is_exported {
889            Some("public".into())
890        } else {
891            None
892        };
893        add_contains(nodes, edges, &node);
894        nodes.push(node);
895    }
896}
897
898fn add_call_refs(
899    file_path: &str,
900    source: &str,
901    language: Language,
902    nodes: &[Node],
903    refs: &mut Vec<UnresolvedReference>,
904    pattern: &str,
905) {
906    let re = Regex::new(pattern).unwrap();
907    let keywords = [
908        "if", "for", "while", "match", "return", "fn", "test", "inspect", "Some", "Ok", "Err",
909    ];
910    for cap in re.captures_iter(source) {
911        let name = cap.get(1).unwrap().as_str().rsplit("::").next().unwrap();
912        if keywords.contains(&name) {
913            continue;
914        }
915        let line = line_for(source, cap.get(1).unwrap().start());
916        if let Some(caller) = nodes
917            .iter()
918            .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
919            .rev()
920            .find(|n| n.start_line <= line)
921        {
922            refs.push(unresolved(
923                &caller.id,
924                name,
925                EdgeKind::Calls,
926                file_path,
927                language,
928                line,
929            ));
930        }
931    }
932}
933
934fn make_node(
935    file_path: &str,
936    language: Language,
937    kind: NodeKind,
938    name: &str,
939    line: i64,
940    col: i64,
941    now: i64,
942    signature: Option<String>,
943) -> Node {
944    Node {
945        id: format!("{}:{}:{}:{}", kind.as_str(), file_path, name, line),
946        kind,
947        name: name.to_string(),
948        qualified_name: name.to_string(),
949        file_path: file_path.to_string(),
950        language,
951        start_line: line,
952        end_line: line,
953        start_column: col,
954        end_column: col,
955        docstring: None,
956        signature,
957        visibility: None,
958        is_exported: false,
959        is_async: false,
960        is_static: false,
961        is_abstract: false,
962        updated_at: now,
963    }
964}
965
966fn make_node_span(
967    file_path: &str,
968    language: Language,
969    kind: NodeKind,
970    name: &str,
971    node: SyntaxNode,
972    now: i64,
973    signature: Option<String>,
974) -> Node {
975    let start = node.start_position();
976    let end = node.end_position();
977    Node {
978        id: format!("{}:{}:{}:{}", kind.as_str(), file_path, name, start.row + 1),
979        kind,
980        name: name.to_string(),
981        qualified_name: name.to_string(),
982        file_path: file_path.to_string(),
983        language,
984        start_line: start.row as i64 + 1,
985        end_line: end.row as i64 + 1,
986        start_column: start.column as i64,
987        end_column: end.column as i64,
988        docstring: None,
989        signature,
990        visibility: None,
991        is_exported: false,
992        is_async: false,
993        is_static: false,
994        is_abstract: false,
995        updated_at: now,
996    }
997}
998
999fn add_contains(nodes: &[Node], edges: &mut Vec<Edge>, node: &Node) {
1000    if let Some(file) = nodes.first() {
1001        edges.push(Edge {
1002            id: None,
1003            source: file.id.clone(),
1004            target: node.id.clone(),
1005            kind: EdgeKind::Contains,
1006            line: None,
1007            col: None,
1008            provenance: Some("regex".into()),
1009        });
1010    }
1011}
1012
1013fn add_contains_from_stack(
1014    nodes: &[Node],
1015    edges: &mut Vec<Edge>,
1016    stack: &[String],
1017    node: &Node,
1018    provenance: &str,
1019) {
1020    let source = stack
1021        .last()
1022        .cloned()
1023        .or_else(|| nodes.first().map(|n| n.id.clone()));
1024    if let Some(source) = source {
1025        edges.push(Edge {
1026            id: None,
1027            source,
1028            target: node.id.clone(),
1029            kind: EdgeKind::Contains,
1030            line: None,
1031            col: None,
1032            provenance: Some(provenance.into()),
1033        });
1034    }
1035}
1036
1037fn unresolved(
1038    from: &str,
1039    name: &str,
1040    kind: EdgeKind,
1041    file_path: &str,
1042    language: Language,
1043    line: i64,
1044) -> UnresolvedReference {
1045    UnresolvedReference {
1046        from_node_id: from.to_string(),
1047        reference_name: name.to_string(),
1048        reference_kind: kind,
1049        line,
1050        column: 0,
1051        file_path: file_path.to_string(),
1052        language,
1053    }
1054}
1055
1056fn refs_push(
1057    refs: &mut Vec<UnresolvedReference>,
1058    from: &str,
1059    name: &str,
1060    kind: EdgeKind,
1061    file_path: &str,
1062    language: Language,
1063    line: i64,
1064    column: i64,
1065) {
1066    if !name.is_empty() {
1067        refs.push(UnresolvedReference {
1068            from_node_id: from.to_string(),
1069            reference_name: name.to_string(),
1070            reference_kind: kind,
1071            line,
1072            column,
1073            file_path: file_path.to_string(),
1074            language,
1075        });
1076    }
1077}
1078
1079fn named_children(node: SyntaxNode) -> Vec<SyntaxNode> {
1080    (0..node.named_child_count())
1081        .filter_map(|i| node.named_child(i as u32))
1082        .collect()
1083}
1084
1085fn node_text<'a>(node: SyntaxNode, source: &'a str) -> &'a str {
1086    source.get(node.byte_range()).unwrap_or_default()
1087}
1088
1089fn child_text_by_kind<'a>(node: SyntaxNode, source: &'a str, kinds: &[&str]) -> Option<&'a str> {
1090    named_children(node)
1091        .into_iter()
1092        .find(|child| kinds.contains(&child.kind()))
1093        .map(|child| node_text(child, source))
1094}
1095
1096fn descendant_text_by_kind<'a>(
1097    node: SyntaxNode,
1098    source: &'a str,
1099    kinds: &[&str],
1100) -> Option<&'a str> {
1101    if kinds.contains(&node.kind()) {
1102        return Some(node_text(node, source));
1103    }
1104    for child in named_children(node) {
1105        if let Some(text) = descendant_text_by_kind(child, source, kinds) {
1106            return Some(text);
1107        }
1108    }
1109    None
1110}
1111
1112fn rust_node_name(node: SyntaxNode, source: &str, kind: NodeKind) -> Option<String> {
1113    if kind == NodeKind::Import {
1114        return rust_import_root(node, source);
1115    }
1116    if kind == NodeKind::Variable && node.kind() == "let_declaration" {
1117        return descendant_text_by_kind(node, source, &["identifier"]).map(clean_symbol_name);
1118    }
1119    if kind == NodeKind::Field {
1120        return child_text_by_kind(node, source, &["field_identifier", "identifier"])
1121            .map(clean_symbol_name);
1122    }
1123    node.child_by_field_name("name")
1124        .map(|n| clean_symbol_name(node_text(n, source)))
1125        .or_else(|| {
1126            child_text_by_kind(
1127                node,
1128                source,
1129                &["identifier", "type_identifier", "field_identifier"],
1130            )
1131            .map(clean_symbol_name)
1132        })
1133}
1134
1135fn rust_is_public(node: SyntaxNode, source: &str) -> bool {
1136    node_text(node, source).trim_start().starts_with("pub")
1137        || named_children(node).into_iter().any(|child| {
1138            child.kind() == "visibility_modifier" && node_text(child, source).contains("pub")
1139        })
1140}
1141
1142fn rust_receiver_type(node: SyntaxNode, source: &str) -> Option<String> {
1143    let mut parent = node.parent();
1144    while let Some(p) = parent {
1145        if p.kind() == "impl_item" {
1146            let mut direct = named_children(p)
1147                .into_iter()
1148                .filter(|child| {
1149                    matches!(
1150                        child.kind(),
1151                        "type_identifier" | "generic_type" | "scoped_type_identifier"
1152                    )
1153                })
1154                .collect::<Vec<_>>();
1155            if let Some(last) = direct.pop() {
1156                return Some(clean_type_name(node_text(last, source)));
1157            }
1158            return descendant_text_by_kind(p, source, &["type_identifier"]).map(clean_type_name);
1159        }
1160        parent = p.parent();
1161    }
1162    None
1163}
1164
1165fn rust_impl_trait_for_type(node: SyntaxNode, source: &str) -> Option<(String, String)> {
1166    if node.kind() != "impl_item" || !node_text(node, source).contains(" for ") {
1167        return None;
1168    }
1169    let names: Vec<String> = named_children(node)
1170        .into_iter()
1171        .filter(|child| {
1172            matches!(
1173                child.kind(),
1174                "type_identifier" | "generic_type" | "scoped_type_identifier"
1175            )
1176        })
1177        .map(|child| clean_type_name(node_text(child, source)))
1178        .collect();
1179    if names.len() >= 2 {
1180        Some((names[0].clone(), names[names.len() - 1].clone()))
1181    } else {
1182        None
1183    }
1184}
1185
1186fn rust_import_root(node: SyntaxNode, source: &str) -> Option<String> {
1187    let text = node_text(node, source)
1188        .trim()
1189        .strip_prefix("use")
1190        .unwrap_or(node_text(node, source))
1191        .trim()
1192        .trim_end_matches(';')
1193        .trim();
1194    text.split("::")
1195        .next()
1196        .map(|s| s.trim_matches('{').trim().to_string())
1197        .filter(|s| !s.is_empty())
1198}
1199
1200fn callable_name(node: SyntaxNode, source: &str) -> Option<String> {
1201    match node.kind() {
1202        "identifier" | "field_identifier" => Some(clean_symbol_name(node_text(node, source))),
1203        "scoped_identifier" => node_text(node, source)
1204            .rsplit("::")
1205            .next()
1206            .map(clean_symbol_name),
1207        "field_expression" => node
1208            .child_by_field_name("field")
1209            .map(|field| clean_symbol_name(node_text(field, source))),
1210        "generic_function" => named_children(node)
1211            .into_iter()
1212            .find_map(|child| callable_name(child, source)),
1213        _ => None,
1214    }
1215}
1216
1217fn moonbit_node_name(node: SyntaxNode, source: &str, kind: NodeKind) -> Option<String> {
1218    match kind {
1219        NodeKind::Function | NodeKind::Method => child_text_by_kind(
1220            node,
1221            source,
1222            &["function_identifier", "lowercase_identifier", "identifier"],
1223        )
1224        .map(|s| clean_symbol_name(s.rsplit("::").next().unwrap_or(s))),
1225        NodeKind::Struct | NodeKind::Trait | NodeKind::Enum => child_text_by_kind(
1226            node,
1227            source,
1228            &[
1229                "identifier",
1230                "type_identifier",
1231                "type_name",
1232                "uppercase_identifier",
1233            ],
1234        )
1235        .map(clean_symbol_name),
1236        NodeKind::EnumMember => child_text_by_kind(
1237            node,
1238            source,
1239            &["uppercase_identifier", "identifier", "type_name"],
1240        )
1241        .map(clean_symbol_name),
1242        NodeKind::TypeAlias => descendant_text_by_kind(
1243            node,
1244            source,
1245            &[
1246                "type_identifier",
1247                "type_name",
1248                "identifier",
1249                "uppercase_identifier",
1250            ],
1251        )
1252        .map(clean_symbol_name),
1253        NodeKind::Constant => {
1254            child_text_by_kind(node, source, &["uppercase_identifier", "identifier"])
1255                .map(clean_symbol_name)
1256        }
1257        NodeKind::Import => moonbit_import_name(node, source),
1258        NodeKind::Module => node
1259            .named_child(0)
1260            .map(|child| clean_quoted(node_text(child, source))),
1261        _ => None,
1262    }
1263}
1264
1265fn moonbit_is_public(node: SyntaxNode, source: &str) -> bool {
1266    named_children(node)
1267        .into_iter()
1268        .any(|child| child.kind() == "visibility" && node_text(child, source).contains("pub"))
1269        || node_text(node, source).trim_start().starts_with("pub ")
1270}
1271
1272fn moonbit_impl_owner(node: SyntaxNode, source: &str) -> Option<String> {
1273    child_text_by_kind(
1274        node,
1275        source,
1276        &["type_name", "type_identifier", "qualified_type_identifier"],
1277    )
1278    .map(clean_type_name)
1279}
1280
1281fn moonbit_import_name(node: SyntaxNode, source: &str) -> Option<String> {
1282    if node.kind() == "import_declaration" {
1283        return named_children(node)
1284            .into_iter()
1285            .find(|child| child.kind() == "import_item")
1286            .and_then(|child| moonbit_import_name(child, source));
1287    }
1288    named_children(node)
1289        .into_iter()
1290        .find(|child| child.kind() == "string_literal")
1291        .map(|child| clean_quoted(node_text(child, source)))
1292}
1293
1294fn moonbit_call_name(node: SyntaxNode, source: &str) -> Option<String> {
1295    for child in named_children(node) {
1296        match child.kind() {
1297            "qualified_identifier" | "function_identifier" | "method_expression" => {
1298                let text = node_text(child, source);
1299                let name = text
1300                    .rsplit(['.', ':'])
1301                    .find(|part| !part.is_empty())
1302                    .unwrap_or(text);
1303                return Some(clean_symbol_name(name));
1304            }
1305            "lowercase_identifier" | "identifier" => {
1306                return Some(clean_symbol_name(node_text(child, source)));
1307            }
1308            _ => {}
1309        }
1310    }
1311    None
1312}
1313
1314fn enclosing_callable(nodes: &[Node], line: i64) -> Option<&Node> {
1315    nodes
1316        .iter()
1317        .filter(|n| matches!(n.kind, NodeKind::Function | NodeKind::Method))
1318        .filter(|n| n.start_line <= line && line <= n.end_line.max(n.start_line))
1319        .min_by_key(|n| n.end_line - n.start_line)
1320}
1321
1322fn clean_symbol_name(s: &str) -> String {
1323    s.trim()
1324        .trim_matches('"')
1325        .trim_matches('\'')
1326        .trim_start_matches('.')
1327        .to_string()
1328}
1329
1330fn clean_quoted(s: &str) -> String {
1331    s.trim().trim_matches('"').trim_matches('\'').to_string()
1332}
1333
1334fn clean_type_name(s: &str) -> String {
1335    let s = s.trim();
1336    let before_generics = s.split('<').next().unwrap_or(s);
1337    before_generics
1338        .rsplit("::")
1339        .next()
1340        .unwrap_or(before_generics)
1341        .trim()
1342        .to_string()
1343}
1344
1345fn line_for(source: &str, idx: usize) -> i64 {
1346    source[..idx.min(source.len())]
1347        .bytes()
1348        .filter(|b| *b == b'\n')
1349        .count() as i64
1350        + 1
1351}
1352
1353fn extract_mbt_markdown_code_with_padding(source: &str) -> String {
1354    let mut out = String::new();
1355    let mut in_mbt = false;
1356    for line in source.lines() {
1357        let trimmed = line.trim_start();
1358        if trimmed.starts_with("```") {
1359            in_mbt = trimmed.contains("mbt");
1360            out.push('\n');
1361            continue;
1362        }
1363        if in_mbt {
1364            out.push_str(line);
1365        }
1366        out.push('\n');
1367    }
1368    out
1369}
1370
1371fn now_ms() -> i64 {
1372    std::time::SystemTime::now()
1373        .duration_since(std::time::UNIX_EPOCH)
1374        .map(|d| d.as_millis() as i64)
1375        .unwrap_or_default()
1376}