Skip to main content

graphify_extract/
treesitter.rs

1//! Tree-sitter based AST extraction engine.
2//!
3//! Provides accurate structural extraction using native tree-sitter grammars
4//! for Python, JavaScript, TypeScript, Rust, Go, Java, C, C++, Ruby, and C#. Falls back gracefully
5//! to the regex-based extractor for unsupported languages.
6
7use std::collections::{HashMap, HashSet};
8use std::path::Path;
9
10use graphify_core::confidence::Confidence;
11use graphify_core::id::make_id;
12use graphify_core::model::{ExtractionResult, GraphEdge, GraphNode, NodeType};
13use tracing::trace;
14use tree_sitter::{Language, Node, Parser};
15
16// ═══════════════════════════════════════════════════════════════════════════
17// Configuration per language
18// ═══════════════════════════════════════════════════════════════════════════
19
20/// Describes which tree-sitter node kinds correspond to classes, functions,
21/// imports and calls for a given language.
22pub struct TsConfig {
23    pub class_types: HashSet<&'static str>,
24    pub function_types: HashSet<&'static str>,
25    pub import_types: HashSet<&'static str>,
26    pub call_types: HashSet<&'static str>,
27    /// Field name used by the grammar to expose the identifier of a definition.
28    pub name_field: &'static str,
29    /// Optional override for class/struct name field (defaults to name_field).
30    pub class_name_field: Option<&'static str>,
31    /// Field name for the body block of a class/function.
32    pub body_field: &'static str,
33    /// Field name inside a call expression that points to the callee.
34    pub call_function_field: &'static str,
35}
36
37fn python_config() -> TsConfig {
38    TsConfig {
39        class_types: ["class_definition"].into_iter().collect(),
40        function_types: ["function_definition"].into_iter().collect(),
41        import_types: ["import_statement", "import_from_statement"]
42            .into_iter()
43            .collect(),
44        call_types: ["call"].into_iter().collect(),
45        name_field: "name",
46        class_name_field: None,
47        body_field: "body",
48        call_function_field: "function",
49    }
50}
51
52fn js_config() -> TsConfig {
53    TsConfig {
54        class_types: ["class_declaration", "class"].into_iter().collect(),
55        function_types: [
56            "function_declaration",
57            "method_definition",
58            "arrow_function",
59            "generator_function_declaration",
60            "generator_function",
61            "async_function_declaration",
62        ]
63        .into_iter()
64        .collect(),
65        import_types: ["import_statement"].into_iter().collect(),
66        call_types: ["call_expression"].into_iter().collect(),
67        name_field: "name",
68        class_name_field: None,
69        body_field: "body",
70        call_function_field: "function",
71    }
72}
73
74fn rust_config() -> TsConfig {
75    TsConfig {
76        class_types: ["struct_item", "enum_item", "trait_item", "impl_item"]
77            .into_iter()
78            .collect(),
79        function_types: ["function_item"].into_iter().collect(),
80        import_types: ["use_declaration"].into_iter().collect(),
81        call_types: ["call_expression"].into_iter().collect(),
82        name_field: "name",
83        class_name_field: None,
84        body_field: "body",
85        call_function_field: "function",
86    }
87}
88
89fn go_config() -> TsConfig {
90    TsConfig {
91        class_types: ["type_declaration"].into_iter().collect(),
92        function_types: ["function_declaration", "method_declaration"]
93            .into_iter()
94            .collect(),
95        import_types: ["import_declaration"].into_iter().collect(),
96        call_types: ["call_expression"].into_iter().collect(),
97        name_field: "name",
98        class_name_field: None,
99        body_field: "body",
100        call_function_field: "function",
101    }
102}
103
104fn java_config() -> TsConfig {
105    TsConfig {
106        class_types: [
107            "class_declaration",
108            "interface_declaration",
109            "enum_declaration",
110        ]
111        .into_iter()
112        .collect(),
113        function_types: ["method_declaration", "constructor_declaration"]
114            .into_iter()
115            .collect(),
116        import_types: ["import_declaration"].into_iter().collect(),
117        call_types: ["method_invocation"].into_iter().collect(),
118        name_field: "name",
119        class_name_field: None,
120        body_field: "body",
121        call_function_field: "name",
122    }
123}
124
125fn c_config() -> TsConfig {
126    TsConfig {
127        class_types: ["struct_specifier", "enum_specifier", "type_definition"]
128            .into_iter()
129            .collect(),
130        function_types: ["function_definition"].into_iter().collect(),
131        import_types: ["preproc_include"].into_iter().collect(),
132        call_types: ["call_expression"].into_iter().collect(),
133        name_field: "declarator",
134        class_name_field: Some("name"),
135        body_field: "body",
136        call_function_field: "function",
137    }
138}
139
140fn cpp_config() -> TsConfig {
141    TsConfig {
142        class_types: [
143            "class_specifier",
144            "struct_specifier",
145            "enum_specifier",
146            "namespace_definition",
147        ]
148        .into_iter()
149        .collect(),
150        function_types: ["function_definition"].into_iter().collect(),
151        import_types: ["preproc_include"].into_iter().collect(),
152        call_types: ["call_expression"].into_iter().collect(),
153        name_field: "declarator",
154        class_name_field: Some("name"),
155        body_field: "body",
156        call_function_field: "function",
157    }
158}
159
160fn ruby_config() -> TsConfig {
161    TsConfig {
162        class_types: ["class", "module"].into_iter().collect(),
163        function_types: ["method", "singleton_method"].into_iter().collect(),
164        import_types: ["call"].into_iter().collect(), // require/require_relative are method calls
165        call_types: ["call"].into_iter().collect(),
166        name_field: "name",
167        class_name_field: None,
168        body_field: "body",
169        call_function_field: "method",
170    }
171}
172
173fn csharp_config() -> TsConfig {
174    TsConfig {
175        class_types: [
176            "class_declaration",
177            "interface_declaration",
178            "struct_declaration",
179            "enum_declaration",
180        ]
181        .into_iter()
182        .collect(),
183        function_types: ["method_declaration", "constructor_declaration"]
184            .into_iter()
185            .collect(),
186        import_types: ["using_directive"].into_iter().collect(),
187        call_types: ["invocation_expression"].into_iter().collect(),
188        name_field: "name",
189        class_name_field: None,
190        body_field: "body",
191        call_function_field: "function",
192    }
193}
194
195fn dart_config() -> TsConfig {
196    TsConfig {
197        class_types: [
198            "class_definition",
199            "enum_declaration",
200            "mixin_declaration",
201            "extension_declaration",
202        ]
203        .into_iter()
204        .collect(),
205        function_types: [
206            "function_signature",
207            "method_signature",
208            "function_body",
209            "function_declaration",
210            "method_definition",
211        ]
212        .into_iter()
213        .collect(),
214        import_types: ["import_or_export", "part_directive", "part_of_directive"]
215            .into_iter()
216            .collect(),
217        call_types: ["method_invocation", "function_expression_invocation"]
218            .into_iter()
219            .collect(),
220        name_field: "name",
221        class_name_field: None,
222        body_field: "body",
223        call_function_field: "function",
224    }
225}
226
227// ═══════════════════════════════════════════════════════════════════════════
228// Public entry point
229// ═══════════════════════════════════════════════════════════════════════════
230
231/// Try tree-sitter extraction for a supported language.
232/// Returns `None` if the language is not supported by tree-sitter grammars.
233pub fn try_extract(path: &Path, source: &[u8], lang: &str) -> Option<ExtractionResult> {
234    let (language, config) = match lang {
235        "python" => (tree_sitter_python::LANGUAGE.into(), python_config()),
236        "javascript" => (tree_sitter_javascript::LANGUAGE.into(), js_config()),
237        "typescript" => (
238            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
239            js_config(),
240        ),
241        "rust" => (tree_sitter_rust::LANGUAGE.into(), rust_config()),
242        "go" => (tree_sitter_go::LANGUAGE.into(), go_config()),
243        "java" => (tree_sitter_java::LANGUAGE.into(), java_config()),
244        "c" => (tree_sitter_c::LANGUAGE.into(), c_config()),
245        "cpp" => (tree_sitter_cpp::LANGUAGE.into(), cpp_config()),
246        "ruby" => (tree_sitter_ruby::LANGUAGE.into(), ruby_config()),
247        "csharp" => (tree_sitter_c_sharp::LANGUAGE.into(), csharp_config()),
248        "dart" => (tree_sitter_dart::LANGUAGE.into(), dart_config()),
249        _ => return None,
250    };
251    extract_with_treesitter(path, source, language, &config, lang)
252}
253
254// ═══════════════════════════════════════════════════════════════════════════
255// Core extraction
256// ═══════════════════════════════════════════════════════════════════════════
257
258/// Extract graph nodes and edges from a single file using tree-sitter.
259fn extract_with_treesitter(
260    path: &Path,
261    source: &[u8],
262    language: Language,
263    config: &TsConfig,
264    lang: &str,
265) -> Option<ExtractionResult> {
266    let mut parser = Parser::new();
267    parser.set_language(&language).ok()?;
268    let tree = parser.parse(source, None)?;
269    let root = tree.root_node();
270
271    let stem = path.file_stem()?.to_str()?;
272    let str_path = path.to_string_lossy();
273
274    let mut nodes = Vec::new();
275    let mut edges = Vec::new();
276    let mut seen_ids = HashSet::new();
277    // For the call-graph pass we record (caller_nid, body_start_byte, body_end_byte)
278    let mut function_bodies: Vec<(String, usize, usize)> = Vec::new();
279
280    // File node
281    let file_nid = make_id(&[&str_path]);
282    seen_ids.insert(file_nid.clone());
283    nodes.push(GraphNode {
284        id: file_nid.clone(),
285        label: stem.to_string(),
286        source_file: str_path.to_string(),
287        source_location: None,
288        node_type: NodeType::File,
289        community: None,
290        extra: HashMap::new(),
291    });
292
293    // Walk the AST
294    walk_node(
295        root,
296        source,
297        config,
298        lang,
299        &file_nid,
300        stem,
301        &str_path,
302        &mut nodes,
303        &mut edges,
304        &mut seen_ids,
305        &mut function_bodies,
306        None,
307    );
308
309    // ---- Call-graph pass ----
310    // Build label → nid mapping for known functions
311    let label_to_nid: HashMap<String, String> = nodes
312        .iter()
313        .filter(|n| matches!(n.node_type, NodeType::Function | NodeType::Method))
314        .map(|n| {
315            let normalized = n
316                .label
317                .trim_end_matches("()")
318                .trim_start_matches('.')
319                .to_lowercase();
320            (normalized, n.id.clone())
321        })
322        .collect();
323
324    let mut seen_calls: HashSet<(String, String)> = HashSet::new();
325    for (caller_nid, body_start, body_end) in &function_bodies {
326        let body_text = &source[*body_start..*body_end];
327        let body_str = String::from_utf8_lossy(body_text);
328        let body_lower = body_str.to_lowercase();
329        for (func_label, callee_nid) in &label_to_nid {
330            if callee_nid == caller_nid {
331                continue;
332            }
333            // Heuristic: look for `func_name(` in body, or for Ruby-style no-parens calls
334            let has_paren_call = body_lower.contains(&format!("{func_label}("));
335            let has_noparen_call = if lang == "ruby" {
336                // Ruby allows `func arg` or `func\n` — check if func_label appears
337                // as a standalone word (not part of a longer identifier)
338                body_lower.find(func_label.as_str()).is_some_and(|pos| {
339                    let after = pos + func_label.len();
340                    if after >= body_lower.len() {
341                        true // at end of body
342                    } else {
343                        let next_ch = body_lower.as_bytes()[after];
344                        // Must be followed by non-alphanumeric (space, newline, paren, etc.)
345                        !next_ch.is_ascii_alphanumeric() && next_ch != b'_'
346                    }
347                })
348            } else {
349                false
350            };
351            if has_paren_call || has_noparen_call {
352                let key = (caller_nid.clone(), callee_nid.clone());
353                if seen_calls.insert(key) {
354                    edges.push(GraphEdge {
355                        source: caller_nid.clone(),
356                        target: callee_nid.clone(),
357                        relation: "calls".to_string(),
358                        confidence: Confidence::Inferred,
359                        confidence_score: Confidence::Inferred.default_score(),
360                        source_file: str_path.to_string(),
361                        source_location: None,
362                        weight: 1.0,
363                        extra: HashMap::new(),
364                    });
365                }
366            }
367        }
368    }
369
370    trace!(
371        "treesitter({}): {} nodes, {} edges from {}",
372        lang,
373        nodes.len(),
374        edges.len(),
375        str_path
376    );
377
378    Some(ExtractionResult {
379        nodes,
380        edges,
381        hyperedges: vec![],
382    })
383}
384
385// ═══════════════════════════════════════════════════════════════════════════
386// AST walking
387// ═══════════════════════════════════════════════════════════════════════════
388
389#[allow(clippy::too_many_arguments)]
390fn walk_node(
391    node: Node,
392    source: &[u8],
393    config: &TsConfig,
394    lang: &str,
395    file_nid: &str,
396    stem: &str,
397    str_path: &str,
398    nodes: &mut Vec<GraphNode>,
399    edges: &mut Vec<GraphEdge>,
400    seen_ids: &mut HashSet<String>,
401    function_bodies: &mut Vec<(String, usize, usize)>,
402    parent_class_nid: Option<&str>,
403) {
404    let kind = node.kind();
405
406    // ---- Imports ----
407    if config.import_types.contains(kind) {
408        // For Ruby, `call` is in both import_types and call_types.
409        // Only treat require/require_relative as imports; let other calls recurse normally.
410        if lang == "ruby" && kind == "call" {
411            let method_name = node
412                .child_by_field_name("method")
413                .map(|n| node_text(n, source))
414                .unwrap_or_default();
415            if method_name == "require" || method_name == "require_relative" {
416                extract_import(node, source, file_nid, str_path, lang, edges, nodes);
417                return;
418            }
419            // Not a require call, fall through to normal processing
420        } else {
421            extract_import(node, source, file_nid, str_path, lang, edges, nodes);
422            return; // Don't recurse into import children
423        }
424    }
425
426    // ---- Classes / Structs / Enums / Traits ----
427    if config.class_types.contains(kind) {
428        handle_class_like(
429            node,
430            source,
431            config,
432            lang,
433            file_nid,
434            stem,
435            str_path,
436            nodes,
437            edges,
438            seen_ids,
439            function_bodies,
440        );
441        return;
442    }
443
444    // ---- Functions / Methods ----
445    if config.function_types.contains(kind) {
446        handle_function(
447            node,
448            source,
449            config,
450            lang,
451            file_nid,
452            stem,
453            str_path,
454            nodes,
455            edges,
456            seen_ids,
457            function_bodies,
458            parent_class_nid,
459        );
460        return;
461    }
462
463    // ---- Default: recurse into children ----
464    let mut cursor = node.walk();
465    for child in node.children(&mut cursor) {
466        walk_node(
467            child,
468            source,
469            config,
470            lang,
471            file_nid,
472            stem,
473            str_path,
474            nodes,
475            edges,
476            seen_ids,
477            function_bodies,
478            parent_class_nid,
479        );
480    }
481}
482
483// ═══════════════════════════════════════════════════════════════════════════
484// Class-like handler (class, struct, enum, trait, impl, type_declaration)
485// ═══════════════════════════════════════════════════════════════════════════
486
487#[allow(clippy::too_many_arguments)]
488fn handle_class_like(
489    node: Node,
490    source: &[u8],
491    config: &TsConfig,
492    lang: &str,
493    file_nid: &str,
494    stem: &str,
495    str_path: &str,
496    nodes: &mut Vec<GraphNode>,
497    edges: &mut Vec<GraphEdge>,
498    seen_ids: &mut HashSet<String>,
499    function_bodies: &mut Vec<(String, usize, usize)>,
500) {
501    let kind = node.kind();
502
503    // For Go type_declaration, we need to dig into the type_spec child
504    if lang == "go" && kind == "type_declaration" {
505        let mut cursor = node.walk();
506        for child in node.children(&mut cursor) {
507            if child.kind() == "type_spec" {
508                handle_go_type_spec(
509                    child,
510                    source,
511                    config,
512                    lang,
513                    file_nid,
514                    stem,
515                    str_path,
516                    nodes,
517                    edges,
518                    seen_ids,
519                    function_bodies,
520                );
521            }
522        }
523        return;
524    }
525
526    // Rust impl_item: extract methods inside, create "implements" edges
527    if lang == "rust" && kind == "impl_item" {
528        handle_rust_impl(
529            node,
530            source,
531            config,
532            lang,
533            file_nid,
534            stem,
535            str_path,
536            nodes,
537            edges,
538            seen_ids,
539            function_bodies,
540        );
541        return;
542    }
543
544    // Standard class/struct/enum/trait
545    let class_field = config.class_name_field.unwrap_or(config.name_field);
546    let name = match get_name(node, source, class_field) {
547        Some(n) => n,
548        None => return,
549    };
550    let line = node.start_position().row + 1;
551    let class_nid = make_id(&[str_path, &name]);
552
553    let node_type = classify_class_kind(kind, lang);
554
555    if seen_ids.insert(class_nid.clone()) {
556        nodes.push(GraphNode {
557            id: class_nid.clone(),
558            label: name.clone(),
559            source_file: str_path.to_string(),
560            source_location: Some(format!("L{line}")),
561            node_type,
562            community: None,
563            extra: HashMap::new(),
564        });
565        edges.push(make_edge(file_nid, &class_nid, "defines", str_path, line));
566    }
567
568    // Recurse into body to find methods
569    if let Some(body) = node.child_by_field_name(config.body_field) {
570        let mut cursor = body.walk();
571        for child in body.children(&mut cursor) {
572            walk_node(
573                child,
574                source,
575                config,
576                lang,
577                file_nid,
578                stem,
579                str_path,
580                nodes,
581                edges,
582                seen_ids,
583                function_bodies,
584                Some(&class_nid),
585            );
586        }
587    }
588}
589
590fn classify_class_kind(kind: &str, lang: &str) -> NodeType {
591    match kind {
592        // Rust
593        "struct_item" => NodeType::Struct,
594        "enum_item" => NodeType::Enum,
595        "trait_item" => NodeType::Trait,
596        // C / C++
597        "struct_specifier" => NodeType::Struct,
598        "enum_specifier" => NodeType::Enum,
599        "namespace_definition" => NodeType::Namespace,
600        // C#
601        "struct_declaration" => NodeType::Struct,
602        "enum_declaration" => match lang {
603            "csharp" | "java" | "dart" => NodeType::Enum,
604            _ => NodeType::Enum,
605        },
606        // Java / C#
607        "interface_declaration" => NodeType::Interface,
608        // Dart
609        "mixin_declaration" | "extension_declaration" => NodeType::Class,
610        // Ruby
611        "module" => NodeType::Module,
612        // C (type_definition is used for typedef'd structs/enums)
613        "type_definition" => NodeType::Struct,
614        // Default
615        _ => NodeType::Class,
616    }
617}
618
619#[allow(clippy::too_many_arguments)]
620fn handle_go_type_spec(
621    node: Node,
622    source: &[u8],
623    config: &TsConfig,
624    lang: &str,
625    file_nid: &str,
626    stem: &str,
627    str_path: &str,
628    nodes: &mut Vec<GraphNode>,
629    edges: &mut Vec<GraphEdge>,
630    seen_ids: &mut HashSet<String>,
631    function_bodies: &mut Vec<(String, usize, usize)>,
632) {
633    let name = match get_name(node, source, "name") {
634        Some(n) => n,
635        None => return,
636    };
637    let line = node.start_position().row + 1;
638    let nid = make_id(&[str_path, &name]);
639
640    // Determine struct vs interface by looking at the type child
641    let node_type = {
642        let mut nt = NodeType::Struct;
643        let mut cursor = node.walk();
644        for child in node.children(&mut cursor) {
645            match child.kind() {
646                "interface_type" => {
647                    nt = NodeType::Interface;
648                    break;
649                }
650                "struct_type" => {
651                    nt = NodeType::Struct;
652                    break;
653                }
654                _ => {}
655            }
656        }
657        nt
658    };
659
660    if seen_ids.insert(nid.clone()) {
661        nodes.push(GraphNode {
662            id: nid.clone(),
663            label: name.clone(),
664            source_file: str_path.to_string(),
665            source_location: Some(format!("L{line}")),
666            node_type,
667            community: None,
668            extra: HashMap::new(),
669        });
670        edges.push(make_edge(file_nid, &nid, "defines", str_path, line));
671    }
672
673    // Recurse into body for any child methods (Go doesn't nest methods in struct body,
674    // but interfaces have method specs)
675    if let Some(body) = node.child_by_field_name(config.body_field) {
676        let mut cursor = body.walk();
677        for child in body.children(&mut cursor) {
678            walk_node(
679                child,
680                source,
681                config,
682                lang,
683                file_nid,
684                stem,
685                str_path,
686                nodes,
687                edges,
688                seen_ids,
689                function_bodies,
690                Some(&nid),
691            );
692        }
693    }
694}
695
696#[allow(clippy::too_many_arguments)]
697fn handle_rust_impl(
698    node: Node,
699    source: &[u8],
700    config: &TsConfig,
701    lang: &str,
702    file_nid: &str,
703    stem: &str,
704    str_path: &str,
705    nodes: &mut Vec<GraphNode>,
706    edges: &mut Vec<GraphEdge>,
707    seen_ids: &mut HashSet<String>,
708    function_bodies: &mut Vec<(String, usize, usize)>,
709) {
710    // `impl [Trait for] Type { ... }`
711    // The type is the `type` field, the trait is the `trait` field
712    let type_name = node
713        .child_by_field_name("type")
714        .map(|n| node_text(n, source));
715    let trait_name = node
716        .child_by_field_name("trait")
717        .map(|n| node_text(n, source));
718
719    let impl_target_nid = type_name.as_ref().map(|tn| make_id(&[str_path, tn]));
720
721    // Create an "implements" edge if trait impl
722    if let (Some(trait_n), Some(target_nid)) = (&trait_name, &impl_target_nid) {
723        let line = node.start_position().row + 1;
724        let trait_nid = make_id(&[str_path, trait_n]);
725        edges.push(GraphEdge {
726            source: target_nid.clone(),
727            target: trait_nid,
728            relation: "implements".to_string(),
729            confidence: Confidence::Extracted,
730            confidence_score: Confidence::Extracted.default_score(),
731            source_file: str_path.to_string(),
732            source_location: Some(format!("L{line}")),
733            weight: 1.0,
734            extra: HashMap::new(),
735        });
736    }
737
738    // Recurse into body to find methods, treating them as methods of the impl target
739    if let Some(body) = node.child_by_field_name(config.body_field) {
740        let class_nid = impl_target_nid.as_deref();
741        let mut cursor = body.walk();
742        for child in body.children(&mut cursor) {
743            walk_node(
744                child,
745                source,
746                config,
747                lang,
748                file_nid,
749                stem,
750                str_path,
751                nodes,
752                edges,
753                seen_ids,
754                function_bodies,
755                class_nid,
756            );
757        }
758    }
759}
760
761// ═══════════════════════════════════════════════════════════════════════════
762// Function handler
763// ═══════════════════════════════════════════════════════════════════════════
764
765#[allow(clippy::too_many_arguments)]
766/// Normalize Dart function names: handle getters/setters and named constructors
767fn normalize_dart_function_name(lang: &str, func_name: &str) -> String {
768    if lang != "dart" {
769        return func_name.to_string();
770    }
771
772    let mut name = func_name;
773
774    // Strip "get "/"set " prefix for Dart getters/setters (e.g., "get value" -> "value")
775    if name.starts_with("get ") || name.starts_with("set ") {
776        name = &name[4..];
777    }
778
779    name.to_string()
780}
781
782#[allow(clippy::too_many_arguments)]
783fn handle_function(
784    node: Node,
785    source: &[u8],
786    config: &TsConfig,
787    _lang: &str,
788    file_nid: &str,
789    _stem: &str,
790    str_path: &str,
791    nodes: &mut Vec<GraphNode>,
792    edges: &mut Vec<GraphEdge>,
793    seen_ids: &mut HashSet<String>,
794    function_bodies: &mut Vec<(String, usize, usize)>,
795    parent_class_nid: Option<&str>,
796) {
797    // For JS arrow functions assigned to a variable, the name is on the parent
798    // `variable_declarator` node. But for function_declaration, method_definition,
799    // etc., the name is directly on the node.
800    let func_name = match get_name(node, source, config.name_field) {
801        Some(n) => n,
802        None => {
803            // For JS arrow functions, try to get name from parent variable_declarator
804            if node.kind() == "arrow_function" {
805                if let Some(parent) = node.parent() {
806                    if parent.kind() == "variable_declarator" {
807                        match get_name(parent, source, "name") {
808                            Some(n) => n,
809                            None => return,
810                        }
811                    } else {
812                        return;
813                    }
814                } else {
815                    return;
816                }
817            } else if _lang == "dart" {
818                // Dart function_signature/method_signature may not have a name field;
819                // try to find the first identifier child as the function name
820                let mut found = None;
821                let mut cursor = node.walk();
822                for child in node.children(&mut cursor) {
823                    if child.kind() == "identifier" {
824                        found = Some(node_text(child, source));
825                        break;
826                    }
827                }
828                match found {
829                    Some(n) if !n.is_empty() => n,
830                    _ => return,
831                }
832            } else {
833                return;
834            }
835        }
836    };
837
838    let normalized_name = normalize_dart_function_name(_lang, &func_name);
839    let line = node.start_position().row + 1;
840
841    let (func_nid, label, node_type, relation) = if let Some(class_nid) = parent_class_nid {
842        let nid = make_id(&[class_nid, &normalized_name]);
843        (
844            nid,
845            format!(".{}()", normalized_name),
846            NodeType::Method,
847            "defines",
848        )
849    } else {
850        let nid = make_id(&[str_path, &normalized_name]);
851        (
852            nid,
853            format!("{}()", normalized_name),
854            NodeType::Function,
855            "defines",
856        )
857    };
858
859    if seen_ids.insert(func_nid.clone()) {
860        nodes.push(GraphNode {
861            id: func_nid.clone(),
862            label,
863            source_file: str_path.to_string(),
864            source_location: Some(format!("L{line}")),
865            node_type,
866            community: None,
867            extra: HashMap::new(),
868        });
869
870        let parent_nid = parent_class_nid.unwrap_or(file_nid);
871        edges.push(make_edge(parent_nid, &func_nid, relation, str_path, line));
872    }
873
874    // Record the function body bytes for call-graph inference
875    if let Some(body) = node.child_by_field_name(config.body_field) {
876        function_bodies.push((func_nid, body.start_byte(), body.end_byte()));
877    } else {
878        // Fallback: use the whole node as body
879        function_bodies.push((func_nid, node.start_byte(), node.end_byte()));
880    }
881}
882
883// ═══════════════════════════════════════════════════════════════════════════
884// Import handler
885// ═══════════════════════════════════════════════════════════════════════════
886
887fn extract_import(
888    node: Node,
889    source: &[u8],
890    file_nid: &str,
891    str_path: &str,
892    lang: &str,
893    edges: &mut Vec<GraphEdge>,
894    nodes: &mut Vec<GraphNode>,
895) {
896    let line = node.start_position().row + 1;
897    let import_text = node_text(node, source);
898
899    match lang {
900        "python" => extract_python_import(node, source, file_nid, str_path, line, edges, nodes),
901        "javascript" | "typescript" => {
902            extract_js_import(node, source, file_nid, str_path, line, edges, nodes)
903        }
904        "rust" => {
905            // `use foo::bar::Baz;` → module = full text after "use"
906            let module = import_text
907                .strip_prefix("use ")
908                .unwrap_or(&import_text)
909                .trim_end_matches(';')
910                .trim();
911            add_import_node(
912                nodes,
913                edges,
914                file_nid,
915                str_path,
916                line,
917                module,
918                NodeType::Module,
919            );
920        }
921        "go" => {
922            extract_go_import(node, source, file_nid, str_path, line, edges, nodes);
923        }
924        "java" => {
925            // `import java.util.List;` or `import static java.util.Arrays.asList;`
926            let text = node_text(node, source);
927            let after_import = text.trim().strip_prefix("import ").unwrap_or(text.trim());
928            let module = after_import
929                .strip_prefix("static ")
930                .unwrap_or(after_import)
931                .trim_end_matches(';')
932                .trim();
933            add_import_node(
934                nodes,
935                edges,
936                file_nid,
937                str_path,
938                line,
939                module,
940                NodeType::Module,
941            );
942        }
943        "c" | "cpp" => {
944            // `#include <stdio.h>` or `#include "myheader.h"`
945            let text = node_text(node, source);
946            let module = text
947                .trim()
948                .strip_prefix("#include")
949                .unwrap_or(&text)
950                .trim()
951                .trim_matches(&['<', '>', '"'][..])
952                .trim();
953            add_import_node(
954                nodes,
955                edges,
956                file_nid,
957                str_path,
958                line,
959                module,
960                NodeType::Module,
961            );
962        }
963        "csharp" => {
964            // `using System.Collections.Generic;`
965            let text = node_text(node, source);
966            let module = text
967                .trim()
968                .strip_prefix("using ")
969                .unwrap_or(&text)
970                .trim_end_matches(';')
971                .trim();
972            add_import_node(
973                nodes,
974                edges,
975                file_nid,
976                str_path,
977                line,
978                module,
979                NodeType::Module,
980            );
981        }
982        "ruby" => {
983            extract_ruby_import(node, source, file_nid, str_path, line, edges, nodes);
984        }
985        "dart" => {
986            extract_dart_import(node, source, file_nid, str_path, line, edges, nodes);
987        }
988        _ => {
989            add_import_node(
990                nodes,
991                edges,
992                file_nid,
993                str_path,
994                line,
995                &import_text,
996                NodeType::Module,
997            );
998        }
999    }
1000}
1001
1002fn extract_python_import(
1003    node: Node,
1004    source: &[u8],
1005    file_nid: &str,
1006    str_path: &str,
1007    line: usize,
1008    edges: &mut Vec<GraphEdge>,
1009    nodes: &mut Vec<GraphNode>,
1010) {
1011    // `import_statement`: `import os` → child "dotted_name"
1012    // `import_from_statement`: `from pathlib import Path` → module_name + name children
1013    let kind = node.kind();
1014
1015    if kind == "import_from_statement" {
1016        let module = node
1017            .child_by_field_name("module_name")
1018            .map(|n| node_text(n, source))
1019            .unwrap_or_default();
1020        // Track how many edges existed before this statement
1021        let edges_before = edges.len();
1022        // Iterate over named import children
1023        let mut cursor = node.walk();
1024        for child in node.children(&mut cursor) {
1025            if child.kind() == "dotted_name" || child.kind() == "aliased_import" {
1026                let name_node = if child.kind() == "aliased_import" {
1027                    child.child_by_field_name("name")
1028                } else {
1029                    Some(child)
1030                };
1031                if let Some(nn) = name_node {
1032                    let name = node_text(nn, source);
1033                    if name != module {
1034                        let full = if module.is_empty() {
1035                            name
1036                        } else {
1037                            format!("{module}.{name}")
1038                        };
1039                        add_import_node(
1040                            nodes,
1041                            edges,
1042                            file_nid,
1043                            str_path,
1044                            line,
1045                            &full,
1046                            NodeType::Module,
1047                        );
1048                    }
1049                }
1050            }
1051        }
1052        // If no names were added by this statement (e.g. `from x import *`), add the module
1053        let new_edges = edges.len() - edges_before;
1054        if new_edges == 0 && !module.is_empty() {
1055            add_import_node(
1056                nodes,
1057                edges,
1058                file_nid,
1059                str_path,
1060                line,
1061                &module,
1062                NodeType::Module,
1063            );
1064        }
1065    } else {
1066        // `import os`, `import os.path`
1067        let mut cursor = node.walk();
1068        for child in node.children(&mut cursor) {
1069            if child.kind() == "dotted_name" || child.kind() == "aliased_import" {
1070                let name_node = if child.kind() == "aliased_import" {
1071                    child.child_by_field_name("name")
1072                } else {
1073                    Some(child)
1074                };
1075                if let Some(nn) = name_node {
1076                    let name = node_text(nn, source);
1077                    add_import_node(
1078                        nodes,
1079                        edges,
1080                        file_nid,
1081                        str_path,
1082                        line,
1083                        &name,
1084                        NodeType::Module,
1085                    );
1086                }
1087            }
1088        }
1089    }
1090}
1091
1092fn extract_js_import(
1093    node: Node,
1094    source: &[u8],
1095    file_nid: &str,
1096    str_path: &str,
1097    line: usize,
1098    edges: &mut Vec<GraphEdge>,
1099    nodes: &mut Vec<GraphNode>,
1100) {
1101    // JS import: `import { X, Y } from 'module'` or `import X from 'module'`
1102    // The source/module is in the `source` field
1103    let module = node
1104        .child_by_field_name("source")
1105        .map(|n| {
1106            let t = node_text(n, source);
1107            t.trim_matches(&['"', '\''][..]).to_string()
1108        })
1109        .unwrap_or_default();
1110
1111    // Collect imported identifiers
1112    let mut found_names = false;
1113    let mut cursor = node.walk();
1114    for child in node.children(&mut cursor) {
1115        if child.kind() == "import_clause" {
1116            let mut inner_cursor = child.walk();
1117            for inner in child.children(&mut inner_cursor) {
1118                match inner.kind() {
1119                    "identifier" => {
1120                        let name = node_text(inner, source);
1121                        let full = format!("{module}/{name}");
1122                        add_import_node(
1123                            nodes,
1124                            edges,
1125                            file_nid,
1126                            str_path,
1127                            line,
1128                            &full,
1129                            NodeType::Module,
1130                        );
1131                        found_names = true;
1132                    }
1133                    "named_imports" => {
1134                        let mut spec_cursor = inner.walk();
1135                        for spec in inner.children(&mut spec_cursor) {
1136                            if spec.kind() == "import_specifier" {
1137                                let name = spec
1138                                    .child_by_field_name("name")
1139                                    .map(|n| node_text(n, source))
1140                                    .unwrap_or_else(|| node_text(spec, source));
1141                                let full = format!("{module}/{name}");
1142                                add_import_node(
1143                                    nodes,
1144                                    edges,
1145                                    file_nid,
1146                                    str_path,
1147                                    line,
1148                                    &full,
1149                                    NodeType::Module,
1150                                );
1151                                found_names = true;
1152                            }
1153                        }
1154                    }
1155                    _ => {}
1156                }
1157            }
1158        }
1159    }
1160
1161    if !found_names && !module.is_empty() {
1162        add_import_node(
1163            nodes,
1164            edges,
1165            file_nid,
1166            str_path,
1167            line,
1168            &module,
1169            NodeType::Module,
1170        );
1171    }
1172}
1173
1174fn extract_go_import(
1175    node: Node,
1176    source: &[u8],
1177    file_nid: &str,
1178    str_path: &str,
1179    line: usize,
1180    edges: &mut Vec<GraphEdge>,
1181    nodes: &mut Vec<GraphNode>,
1182) {
1183    // Go imports: `import "fmt"` or `import ( "fmt" \n "os" )`
1184    let mut cursor = node.walk();
1185    for child in node.children(&mut cursor) {
1186        match child.kind() {
1187            "import_spec" => {
1188                if let Some(path_node) = child.child_by_field_name("path") {
1189                    let module = node_text(path_node, source).trim_matches('"').to_string();
1190                    let spec_line = child.start_position().row + 1;
1191                    add_import_node(
1192                        nodes,
1193                        edges,
1194                        file_nid,
1195                        str_path,
1196                        spec_line,
1197                        &module,
1198                        NodeType::Package,
1199                    );
1200                }
1201            }
1202            "import_spec_list" => {
1203                let mut inner = child.walk();
1204                for spec in child.children(&mut inner) {
1205                    if spec.kind() == "import_spec"
1206                        && let Some(path_node) = spec.child_by_field_name("path")
1207                    {
1208                        let module = node_text(path_node, source).trim_matches('"').to_string();
1209                        let spec_line = spec.start_position().row + 1;
1210                        add_import_node(
1211                            nodes,
1212                            edges,
1213                            file_nid,
1214                            str_path,
1215                            spec_line,
1216                            &module,
1217                            NodeType::Package,
1218                        );
1219                    }
1220                }
1221            }
1222            "interpreted_string_literal" => {
1223                // Single import: `import "fmt"`
1224                let module = node_text(child, source).trim_matches('"').to_string();
1225                add_import_node(
1226                    nodes,
1227                    edges,
1228                    file_nid,
1229                    str_path,
1230                    line,
1231                    &module,
1232                    NodeType::Package,
1233                );
1234            }
1235            _ => {}
1236        }
1237    }
1238}
1239
1240fn extract_ruby_import(
1241    node: Node,
1242    source: &[u8],
1243    file_nid: &str,
1244    str_path: &str,
1245    line: usize,
1246    edges: &mut Vec<GraphEdge>,
1247    nodes: &mut Vec<GraphNode>,
1248) {
1249    // Ruby imports are method calls: `require 'json'`, `require_relative 'helper'`
1250    // The tree-sitter node is a `call` with method=identifier("require"/"require_relative")
1251    // and arguments containing a string.
1252    let method_name = node
1253        .child_by_field_name("method")
1254        .map(|n| node_text(n, source))
1255        .unwrap_or_default();
1256
1257    if method_name != "require" && method_name != "require_relative" {
1258        return; // Not an import call, skip
1259    }
1260
1261    // Extract the argument string
1262    if let Some(args) = node.child_by_field_name("arguments") {
1263        let mut cursor = args.walk();
1264        for child in args.children(&mut cursor) {
1265            let kind = child.kind();
1266            if kind == "string" || kind == "string_literal" {
1267                let raw = node_text(child, source);
1268                let module = raw.trim_matches(&['"', '\''][..]).to_string();
1269                if !module.is_empty() {
1270                    add_import_node(
1271                        nodes,
1272                        edges,
1273                        file_nid,
1274                        str_path,
1275                        line,
1276                        &module,
1277                        NodeType::Module,
1278                    );
1279                }
1280                return;
1281            }
1282        }
1283    }
1284
1285    // Fallback: try parsing from the raw text
1286    let text = node_text(node, source);
1287    let module = text
1288        .trim()
1289        .strip_prefix("require_relative ")
1290        .or_else(|| text.trim().strip_prefix("require "))
1291        .unwrap_or(&text)
1292        .trim_matches(&['"', '\'', ' '][..]);
1293    if !module.is_empty() {
1294        add_import_node(
1295            nodes,
1296            edges,
1297            file_nid,
1298            str_path,
1299            line,
1300            module,
1301            NodeType::Module,
1302        );
1303    }
1304}
1305
1306fn extract_dart_import(
1307    node: Node,
1308    source: &[u8],
1309    file_nid: &str,
1310    str_path: &str,
1311    line: usize,
1312    edges: &mut Vec<GraphEdge>,
1313    nodes: &mut Vec<GraphNode>,
1314) {
1315    // Dart: `import 'dart:async';`, `part 'src/models.dart';`
1316    let text = node_text(node, source);
1317    let trimmed = text.trim().trim_end_matches(';').trim();
1318
1319    // Strip keyword prefix
1320    let module = trimmed
1321        .strip_prefix("part of ")
1322        .or_else(|| trimmed.strip_prefix("part "))
1323        .or_else(|| trimmed.strip_prefix("import "))
1324        .or_else(|| trimmed.strip_prefix("export "))
1325        .unwrap_or(trimmed)
1326        .trim()
1327        .trim_matches(&['"', '\''][..])
1328        // Remove `deferred as X`, `as X`, `show X`, `hide X` suffixes
1329        .split(" deferred ")
1330        .next()
1331        .unwrap_or("")
1332        .split(" as ")
1333        .next()
1334        .unwrap_or("")
1335        .split(" show ")
1336        .next()
1337        .unwrap_or("")
1338        .split(" hide ")
1339        .next()
1340        .unwrap_or("")
1341        .trim();
1342
1343    if !module.is_empty() {
1344        add_import_node(
1345            nodes,
1346            edges,
1347            file_nid,
1348            str_path,
1349            line,
1350            module,
1351            NodeType::Module,
1352        );
1353    }
1354}
1355
1356// ═══════════════════════════════════════════════════════════════════════════
1357// Helpers
1358// ═══════════════════════════════════════════════════════════════════════════
1359
1360/// Extract text from a tree-sitter node.
1361fn node_text(node: Node, source: &[u8]) -> String {
1362    node.utf8_text(source).unwrap_or("").to_string()
1363}
1364
1365/// Get the name of a definition node via its field name.
1366fn get_name(node: Node, source: &[u8], field: &str) -> Option<String> {
1367    let name_node = node.child_by_field_name(field)?;
1368    // For C/C++ declarators, unwrap nested declarators to find the identifier
1369    let text = unwrap_declarator_name(name_node, source);
1370    if text.is_empty() { None } else { Some(text) }
1371}
1372
1373/// Recursively unwrap C/C++ declarators (function_declarator, pointer_declarator, etc.)
1374/// to find the underlying identifier name.
1375fn unwrap_declarator_name(node: Node, source: &[u8]) -> String {
1376    match node.kind() {
1377        "function_declarator"
1378        | "pointer_declarator"
1379        | "reference_declarator"
1380        | "parenthesized_declarator" => {
1381            // The actual name is in the "declarator" field or first named child
1382            if let Some(inner) = node.child_by_field_name("declarator") {
1383                return unwrap_declarator_name(inner, source);
1384            }
1385            // Fallback: look for an identifier child
1386            let mut cursor = node.walk();
1387            for child in node.children(&mut cursor) {
1388                if child.kind() == "identifier" || child.kind() == "field_identifier" {
1389                    return node_text(child, source);
1390                }
1391            }
1392            node_text(node, source)
1393        }
1394        "qualified_identifier" | "scoped_identifier" => {
1395            // C++ qualified names like `Foo::bar` — use the "name" field
1396            if let Some(name) = node.child_by_field_name("name") {
1397                return node_text(name, source);
1398            }
1399            node_text(node, source)
1400        }
1401        _ => node_text(node, source),
1402    }
1403}
1404
1405fn add_import_node(
1406    nodes: &mut Vec<GraphNode>,
1407    edges: &mut Vec<GraphEdge>,
1408    file_nid: &str,
1409    str_path: &str,
1410    line: usize,
1411    module: &str,
1412    node_type: NodeType,
1413) {
1414    let import_id = make_id(&[str_path, "import", module]);
1415    nodes.push(GraphNode {
1416        id: import_id.clone(),
1417        label: module.to_string(),
1418        source_file: str_path.to_string(),
1419        source_location: Some(format!("L{line}")),
1420        node_type,
1421        community: None,
1422        extra: HashMap::new(),
1423    });
1424    edges.push(GraphEdge {
1425        source: file_nid.to_string(),
1426        target: import_id,
1427        relation: "imports".to_string(),
1428        confidence: Confidence::Extracted,
1429        confidence_score: Confidence::Extracted.default_score(),
1430        source_file: str_path.to_string(),
1431        source_location: Some(format!("L{line}")),
1432        weight: 1.0,
1433        extra: HashMap::new(),
1434    });
1435}
1436
1437fn make_edge(
1438    source_id: &str,
1439    target_id: &str,
1440    relation: &str,
1441    source_file: &str,
1442    line: usize,
1443) -> GraphEdge {
1444    GraphEdge {
1445        source: source_id.to_string(),
1446        target: target_id.to_string(),
1447        relation: relation.to_string(),
1448        confidence: Confidence::Extracted,
1449        confidence_score: Confidence::Extracted.default_score(),
1450        source_file: source_file.to_string(),
1451        source_location: Some(format!("L{line}")),
1452        weight: 1.0,
1453        extra: HashMap::new(),
1454    }
1455}
1456
1457// ═══════════════════════════════════════════════════════════════════════════
1458// Tests
1459// ═══════════════════════════════════════════════════════════════════════════
1460
1461// Tests moved to tests/treesitter.rs (integration tests)