Skip to main content

graphify_extract/
treesitter.rs

1//! Tree-sitter based AST extraction engine.
2//!
3//! Provides accurate structural extraction using native tree-sitter grammars
4//! for Python, JavaScript, TypeScript, Rust, Go, Java, C, C++, Ruby, and C#. Falls back gracefully
5//! to the regex-based extractor for unsupported languages.
6
7use std::collections::{HashMap, HashSet};
8use std::path::Path;
9
10use graphify_core::confidence::Confidence;
11use graphify_core::id::make_id;
12use graphify_core::model::{ExtractionResult, GraphEdge, GraphNode, NodeType};
13use tracing::trace;
14use tree_sitter::{Language, Node, Parser};
15
16// ═══════════════════════════════════════════════════════════════════════════
17// Configuration per language
18// ═══════════════════════════════════════════════════════════════════════════
19
20/// Describes which tree-sitter node kinds correspond to classes, functions,
21/// imports and calls for a given language.
22pub struct TsConfig {
23    pub class_types: HashSet<&'static str>,
24    pub function_types: HashSet<&'static str>,
25    pub import_types: HashSet<&'static str>,
26    pub call_types: HashSet<&'static str>,
27    /// Field name used by the grammar to expose the identifier of a definition.
28    pub name_field: &'static str,
29    /// Optional override for class/struct name field (defaults to name_field).
30    pub class_name_field: Option<&'static str>,
31    /// Field name for the body block of a class/function.
32    pub body_field: &'static str,
33    /// Field name inside a call expression that points to the callee.
34    pub call_function_field: &'static str,
35}
36
37fn python_config() -> TsConfig {
38    TsConfig {
39        class_types: ["class_definition"].into_iter().collect(),
40        function_types: ["function_definition"].into_iter().collect(),
41        import_types: ["import_statement", "import_from_statement"]
42            .into_iter()
43            .collect(),
44        call_types: ["call"].into_iter().collect(),
45        name_field: "name",
46        class_name_field: None,
47        body_field: "body",
48        call_function_field: "function",
49    }
50}
51
52fn js_config() -> TsConfig {
53    TsConfig {
54        class_types: ["class_declaration", "class"].into_iter().collect(),
55        function_types: [
56            "function_declaration",
57            "method_definition",
58            "arrow_function",
59            "generator_function_declaration",
60        ]
61        .into_iter()
62        .collect(),
63        import_types: ["import_statement"].into_iter().collect(),
64        call_types: ["call_expression"].into_iter().collect(),
65        name_field: "name",
66        class_name_field: None,
67        body_field: "body",
68        call_function_field: "function",
69    }
70}
71
72fn rust_config() -> TsConfig {
73    TsConfig {
74        class_types: ["struct_item", "enum_item", "trait_item", "impl_item"]
75            .into_iter()
76            .collect(),
77        function_types: ["function_item"].into_iter().collect(),
78        import_types: ["use_declaration"].into_iter().collect(),
79        call_types: ["call_expression"].into_iter().collect(),
80        name_field: "name",
81        class_name_field: None,
82        body_field: "body",
83        call_function_field: "function",
84    }
85}
86
87fn go_config() -> TsConfig {
88    TsConfig {
89        class_types: ["type_declaration"].into_iter().collect(),
90        function_types: ["function_declaration", "method_declaration"]
91            .into_iter()
92            .collect(),
93        import_types: ["import_declaration"].into_iter().collect(),
94        call_types: ["call_expression"].into_iter().collect(),
95        name_field: "name",
96        class_name_field: None,
97        body_field: "body",
98        call_function_field: "function",
99    }
100}
101
102fn java_config() -> TsConfig {
103    TsConfig {
104        class_types: ["class_declaration", "interface_declaration"]
105            .into_iter()
106            .collect(),
107        function_types: ["method_declaration", "constructor_declaration"]
108            .into_iter()
109            .collect(),
110        import_types: ["import_declaration"].into_iter().collect(),
111        call_types: ["method_invocation"].into_iter().collect(),
112        name_field: "name",
113        class_name_field: None,
114        body_field: "body",
115        call_function_field: "name",
116    }
117}
118
119fn c_config() -> TsConfig {
120    TsConfig {
121        class_types: HashSet::new(),
122        function_types: ["function_definition"].into_iter().collect(),
123        import_types: ["preproc_include"].into_iter().collect(),
124        call_types: ["call_expression"].into_iter().collect(),
125        name_field: "declarator",
126        class_name_field: None,
127        body_field: "body",
128        call_function_field: "function",
129    }
130}
131
132fn cpp_config() -> TsConfig {
133    TsConfig {
134        class_types: ["class_specifier"].into_iter().collect(),
135        function_types: ["function_definition"].into_iter().collect(),
136        import_types: ["preproc_include"].into_iter().collect(),
137        call_types: ["call_expression"].into_iter().collect(),
138        name_field: "declarator",
139        class_name_field: Some("name"),
140        body_field: "body",
141        call_function_field: "function",
142    }
143}
144
145fn ruby_config() -> TsConfig {
146    TsConfig {
147        class_types: ["class"].into_iter().collect(),
148        function_types: ["method", "singleton_method"].into_iter().collect(),
149        import_types: HashSet::new(),
150        call_types: ["call"].into_iter().collect(),
151        name_field: "name",
152        class_name_field: None,
153        body_field: "body",
154        call_function_field: "method",
155    }
156}
157
158fn csharp_config() -> TsConfig {
159    TsConfig {
160        class_types: ["class_declaration", "interface_declaration"]
161            .into_iter()
162            .collect(),
163        function_types: ["method_declaration"].into_iter().collect(),
164        import_types: ["using_directive"].into_iter().collect(),
165        call_types: ["invocation_expression"].into_iter().collect(),
166        name_field: "name",
167        class_name_field: None,
168        body_field: "body",
169        call_function_field: "function",
170    }
171}
172
173fn dart_config() -> TsConfig {
174    TsConfig {
175        class_types: [
176            "class_definition",
177            "enum_declaration",
178            "mixin_declaration",
179            "extension_declaration",
180        ]
181        .into_iter()
182        .collect(),
183        function_types: ["function_signature", "method_signature", "function_body"]
184            .into_iter()
185            .collect(),
186        import_types: ["import_or_export"].into_iter().collect(),
187        call_types: ["method_invocation", "function_expression_invocation"]
188            .into_iter()
189            .collect(),
190        name_field: "name",
191        class_name_field: None,
192        body_field: "body",
193        call_function_field: "function",
194    }
195}
196
197// ═══════════════════════════════════════════════════════════════════════════
198// Public entry point
199// ═══════════════════════════════════════════════════════════════════════════
200
201/// Try tree-sitter extraction for a supported language.
202/// Returns `None` if the language is not supported by tree-sitter grammars.
203pub fn try_extract(path: &Path, source: &[u8], lang: &str) -> Option<ExtractionResult> {
204    let (language, config) = match lang {
205        "python" => (tree_sitter_python::LANGUAGE.into(), python_config()),
206        "javascript" => (tree_sitter_javascript::LANGUAGE.into(), js_config()),
207        "typescript" => (
208            tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
209            js_config(),
210        ),
211        "rust" => (tree_sitter_rust::LANGUAGE.into(), rust_config()),
212        "go" => (tree_sitter_go::LANGUAGE.into(), go_config()),
213        "java" => (tree_sitter_java::LANGUAGE.into(), java_config()),
214        "c" => (tree_sitter_c::LANGUAGE.into(), c_config()),
215        "cpp" => (tree_sitter_cpp::LANGUAGE.into(), cpp_config()),
216        "ruby" => (tree_sitter_ruby::LANGUAGE.into(), ruby_config()),
217        "csharp" => (tree_sitter_c_sharp::LANGUAGE.into(), csharp_config()),
218        "dart" => (tree_sitter_dart::LANGUAGE.into(), dart_config()),
219        _ => return None,
220    };
221    extract_with_treesitter(path, source, language, &config, lang)
222}
223
224// ═══════════════════════════════════════════════════════════════════════════
225// Core extraction
226// ═══════════════════════════════════════════════════════════════════════════
227
228/// Extract graph nodes and edges from a single file using tree-sitter.
229fn extract_with_treesitter(
230    path: &Path,
231    source: &[u8],
232    language: Language,
233    config: &TsConfig,
234    lang: &str,
235) -> Option<ExtractionResult> {
236    let mut parser = Parser::new();
237    parser.set_language(&language).ok()?;
238    let tree = parser.parse(source, None)?;
239    let root = tree.root_node();
240
241    let stem = path.file_stem()?.to_str()?;
242    let str_path = path.to_string_lossy();
243
244    let mut nodes = Vec::new();
245    let mut edges = Vec::new();
246    let mut seen_ids = HashSet::new();
247    // For the call-graph pass we record (caller_nid, body_start_byte, body_end_byte)
248    let mut function_bodies: Vec<(String, usize, usize)> = Vec::new();
249
250    // File node
251    let file_nid = make_id(&[&str_path]);
252    seen_ids.insert(file_nid.clone());
253    nodes.push(GraphNode {
254        id: file_nid.clone(),
255        label: stem.to_string(),
256        source_file: str_path.to_string(),
257        source_location: None,
258        node_type: NodeType::File,
259        community: None,
260        extra: HashMap::new(),
261    });
262
263    // Walk the AST
264    walk_node(
265        root,
266        source,
267        config,
268        lang,
269        &file_nid,
270        stem,
271        &str_path,
272        &mut nodes,
273        &mut edges,
274        &mut seen_ids,
275        &mut function_bodies,
276        None,
277    );
278
279    // ---- Call-graph pass ----
280    // Build label → nid mapping for known functions
281    let label_to_nid: HashMap<String, String> = nodes
282        .iter()
283        .filter(|n| matches!(n.node_type, NodeType::Function | NodeType::Method))
284        .map(|n| {
285            let normalized = n
286                .label
287                .trim_end_matches("()")
288                .trim_start_matches('.')
289                .to_lowercase();
290            (normalized, n.id.clone())
291        })
292        .collect();
293
294    let mut seen_calls: HashSet<(String, String)> = HashSet::new();
295    for (caller_nid, body_start, body_end) in &function_bodies {
296        let body_text = &source[*body_start..*body_end];
297        let body_str = String::from_utf8_lossy(body_text);
298        for (func_label, callee_nid) in &label_to_nid {
299            if callee_nid == caller_nid {
300                continue;
301            }
302            // Simple heuristic: look for `func_name(` in body
303            if body_str.to_lowercase().contains(&format!("{func_label}(")) {
304                let key = (caller_nid.clone(), callee_nid.clone());
305                if seen_calls.insert(key) {
306                    edges.push(GraphEdge {
307                        source: caller_nid.clone(),
308                        target: callee_nid.clone(),
309                        relation: "calls".to_string(),
310                        confidence: Confidence::Inferred,
311                        confidence_score: Confidence::Inferred.default_score(),
312                        source_file: str_path.to_string(),
313                        source_location: None,
314                        weight: 1.0,
315                        extra: HashMap::new(),
316                    });
317                }
318            }
319        }
320    }
321
322    trace!(
323        "treesitter({}): {} nodes, {} edges from {}",
324        lang,
325        nodes.len(),
326        edges.len(),
327        str_path
328    );
329
330    Some(ExtractionResult {
331        nodes,
332        edges,
333        hyperedges: vec![],
334    })
335}
336
337// ═══════════════════════════════════════════════════════════════════════════
338// AST walking
339// ═══════════════════════════════════════════════════════════════════════════
340
341#[allow(clippy::too_many_arguments)]
342fn walk_node(
343    node: Node,
344    source: &[u8],
345    config: &TsConfig,
346    lang: &str,
347    file_nid: &str,
348    stem: &str,
349    str_path: &str,
350    nodes: &mut Vec<GraphNode>,
351    edges: &mut Vec<GraphEdge>,
352    seen_ids: &mut HashSet<String>,
353    function_bodies: &mut Vec<(String, usize, usize)>,
354    parent_class_nid: Option<&str>,
355) {
356    let kind = node.kind();
357
358    // ---- Imports ----
359    if config.import_types.contains(kind) {
360        extract_import(node, source, file_nid, str_path, lang, edges, nodes);
361        return; // Don't recurse into import children
362    }
363
364    // ---- Classes / Structs / Enums / Traits ----
365    if config.class_types.contains(kind) {
366        handle_class_like(
367            node,
368            source,
369            config,
370            lang,
371            file_nid,
372            stem,
373            str_path,
374            nodes,
375            edges,
376            seen_ids,
377            function_bodies,
378        );
379        return;
380    }
381
382    // ---- Functions / Methods ----
383    if config.function_types.contains(kind) {
384        handle_function(
385            node,
386            source,
387            config,
388            lang,
389            file_nid,
390            stem,
391            str_path,
392            nodes,
393            edges,
394            seen_ids,
395            function_bodies,
396            parent_class_nid,
397        );
398        return;
399    }
400
401    // ---- Default: recurse into children ----
402    let mut cursor = node.walk();
403    for child in node.children(&mut cursor) {
404        walk_node(
405            child,
406            source,
407            config,
408            lang,
409            file_nid,
410            stem,
411            str_path,
412            nodes,
413            edges,
414            seen_ids,
415            function_bodies,
416            parent_class_nid,
417        );
418    }
419}
420
421// ═══════════════════════════════════════════════════════════════════════════
422// Class-like handler (class, struct, enum, trait, impl, type_declaration)
423// ═══════════════════════════════════════════════════════════════════════════
424
425#[allow(clippy::too_many_arguments)]
426fn handle_class_like(
427    node: Node,
428    source: &[u8],
429    config: &TsConfig,
430    lang: &str,
431    file_nid: &str,
432    stem: &str,
433    str_path: &str,
434    nodes: &mut Vec<GraphNode>,
435    edges: &mut Vec<GraphEdge>,
436    seen_ids: &mut HashSet<String>,
437    function_bodies: &mut Vec<(String, usize, usize)>,
438) {
439    let kind = node.kind();
440
441    // For Go type_declaration, we need to dig into the type_spec child
442    if lang == "go" && kind == "type_declaration" {
443        let mut cursor = node.walk();
444        for child in node.children(&mut cursor) {
445            if child.kind() == "type_spec" {
446                handle_go_type_spec(
447                    child,
448                    source,
449                    config,
450                    lang,
451                    file_nid,
452                    stem,
453                    str_path,
454                    nodes,
455                    edges,
456                    seen_ids,
457                    function_bodies,
458                );
459            }
460        }
461        return;
462    }
463
464    // Rust impl_item: extract methods inside, create "implements" edges
465    if lang == "rust" && kind == "impl_item" {
466        handle_rust_impl(
467            node,
468            source,
469            config,
470            lang,
471            file_nid,
472            stem,
473            str_path,
474            nodes,
475            edges,
476            seen_ids,
477            function_bodies,
478        );
479        return;
480    }
481
482    // Standard class/struct/enum/trait
483    let class_field = config.class_name_field.unwrap_or(config.name_field);
484    let name = match get_name(node, source, class_field) {
485        Some(n) => n,
486        None => return,
487    };
488    let line = node.start_position().row + 1;
489    let class_nid = make_id(&[str_path, &name]);
490
491    let node_type = classify_class_kind(kind, lang);
492
493    if seen_ids.insert(class_nid.clone()) {
494        nodes.push(GraphNode {
495            id: class_nid.clone(),
496            label: name.clone(),
497            source_file: str_path.to_string(),
498            source_location: Some(format!("L{line}")),
499            node_type,
500            community: None,
501            extra: HashMap::new(),
502        });
503        edges.push(make_edge(file_nid, &class_nid, "defines", str_path, line));
504    }
505
506    // Recurse into body to find methods
507    if let Some(body) = node.child_by_field_name(config.body_field) {
508        let mut cursor = body.walk();
509        for child in body.children(&mut cursor) {
510            walk_node(
511                child,
512                source,
513                config,
514                lang,
515                file_nid,
516                stem,
517                str_path,
518                nodes,
519                edges,
520                seen_ids,
521                function_bodies,
522                Some(&class_nid),
523            );
524        }
525    }
526}
527
528fn classify_class_kind(kind: &str, lang: &str) -> NodeType {
529    match (kind, lang) {
530        ("struct_item", "rust") => NodeType::Struct,
531        ("enum_item", "rust") => NodeType::Enum,
532        ("trait_item", "rust") => NodeType::Trait,
533        _ => NodeType::Class,
534    }
535}
536
537#[allow(clippy::too_many_arguments)]
538fn handle_go_type_spec(
539    node: Node,
540    source: &[u8],
541    config: &TsConfig,
542    lang: &str,
543    file_nid: &str,
544    stem: &str,
545    str_path: &str,
546    nodes: &mut Vec<GraphNode>,
547    edges: &mut Vec<GraphEdge>,
548    seen_ids: &mut HashSet<String>,
549    function_bodies: &mut Vec<(String, usize, usize)>,
550) {
551    let name = match get_name(node, source, "name") {
552        Some(n) => n,
553        None => return,
554    };
555    let line = node.start_position().row + 1;
556    let nid = make_id(&[str_path, &name]);
557
558    // Determine struct vs interface by looking at the type child
559    let node_type = {
560        let mut nt = NodeType::Struct;
561        let mut cursor = node.walk();
562        for child in node.children(&mut cursor) {
563            match child.kind() {
564                "interface_type" => {
565                    nt = NodeType::Interface;
566                    break;
567                }
568                "struct_type" => {
569                    nt = NodeType::Struct;
570                    break;
571                }
572                _ => {}
573            }
574        }
575        nt
576    };
577
578    if seen_ids.insert(nid.clone()) {
579        nodes.push(GraphNode {
580            id: nid.clone(),
581            label: name.clone(),
582            source_file: str_path.to_string(),
583            source_location: Some(format!("L{line}")),
584            node_type,
585            community: None,
586            extra: HashMap::new(),
587        });
588        edges.push(make_edge(file_nid, &nid, "defines", str_path, line));
589    }
590
591    // Recurse into body for any child methods (Go doesn't nest methods in struct body,
592    // but interfaces have method specs)
593    if let Some(body) = node.child_by_field_name(config.body_field) {
594        let mut cursor = body.walk();
595        for child in body.children(&mut cursor) {
596            walk_node(
597                child,
598                source,
599                config,
600                lang,
601                file_nid,
602                stem,
603                str_path,
604                nodes,
605                edges,
606                seen_ids,
607                function_bodies,
608                Some(&nid),
609            );
610        }
611    }
612}
613
614#[allow(clippy::too_many_arguments)]
615fn handle_rust_impl(
616    node: Node,
617    source: &[u8],
618    config: &TsConfig,
619    lang: &str,
620    file_nid: &str,
621    stem: &str,
622    str_path: &str,
623    nodes: &mut Vec<GraphNode>,
624    edges: &mut Vec<GraphEdge>,
625    seen_ids: &mut HashSet<String>,
626    function_bodies: &mut Vec<(String, usize, usize)>,
627) {
628    // `impl [Trait for] Type { ... }`
629    // The type is the `type` field, the trait is the `trait` field
630    let type_name = node
631        .child_by_field_name("type")
632        .map(|n| node_text(n, source));
633    let trait_name = node
634        .child_by_field_name("trait")
635        .map(|n| node_text(n, source));
636
637    let impl_target_nid = type_name.as_ref().map(|tn| make_id(&[str_path, tn]));
638
639    // Create an "implements" edge if trait impl
640    if let (Some(trait_n), Some(target_nid)) = (&trait_name, &impl_target_nid) {
641        let line = node.start_position().row + 1;
642        let trait_nid = make_id(&[str_path, trait_n]);
643        edges.push(GraphEdge {
644            source: target_nid.clone(),
645            target: trait_nid,
646            relation: "implements".to_string(),
647            confidence: Confidence::Extracted,
648            confidence_score: Confidence::Extracted.default_score(),
649            source_file: str_path.to_string(),
650            source_location: Some(format!("L{line}")),
651            weight: 1.0,
652            extra: HashMap::new(),
653        });
654    }
655
656    // Recurse into body to find methods, treating them as methods of the impl target
657    if let Some(body) = node.child_by_field_name(config.body_field) {
658        let class_nid = impl_target_nid.as_deref();
659        let mut cursor = body.walk();
660        for child in body.children(&mut cursor) {
661            walk_node(
662                child,
663                source,
664                config,
665                lang,
666                file_nid,
667                stem,
668                str_path,
669                nodes,
670                edges,
671                seen_ids,
672                function_bodies,
673                class_nid,
674            );
675        }
676    }
677}
678
679// ═══════════════════════════════════════════════════════════════════════════
680// Function handler
681// ═══════════════════════════════════════════════════════════════════════════
682
683#[allow(clippy::too_many_arguments)]
684fn handle_function(
685    node: Node,
686    source: &[u8],
687    config: &TsConfig,
688    _lang: &str,
689    file_nid: &str,
690    _stem: &str,
691    str_path: &str,
692    nodes: &mut Vec<GraphNode>,
693    edges: &mut Vec<GraphEdge>,
694    seen_ids: &mut HashSet<String>,
695    function_bodies: &mut Vec<(String, usize, usize)>,
696    parent_class_nid: Option<&str>,
697) {
698    // For JS arrow functions assigned to a variable, the name is on the parent
699    // `variable_declarator` node. But for function_declaration, method_definition,
700    // etc., the name is directly on the node.
701    let func_name = match get_name(node, source, config.name_field) {
702        Some(n) => n,
703        None => {
704            // For JS arrow functions, try to get name from parent variable_declarator
705            if node.kind() == "arrow_function" {
706                if let Some(parent) = node.parent() {
707                    if parent.kind() == "variable_declarator" {
708                        match get_name(parent, source, "name") {
709                            Some(n) => n,
710                            None => return,
711                        }
712                    } else {
713                        return;
714                    }
715                } else {
716                    return;
717                }
718            } else {
719                return;
720            }
721        }
722    };
723
724    let line = node.start_position().row + 1;
725
726    let (func_nid, label, node_type, relation) = if let Some(class_nid) = parent_class_nid {
727        let nid = make_id(&[class_nid, &func_name]);
728        (
729            nid,
730            format!(".{}()", func_name),
731            NodeType::Method,
732            "defines",
733        )
734    } else {
735        let nid = make_id(&[str_path, &func_name]);
736        (
737            nid,
738            format!("{}()", func_name),
739            NodeType::Function,
740            "defines",
741        )
742    };
743
744    if seen_ids.insert(func_nid.clone()) {
745        nodes.push(GraphNode {
746            id: func_nid.clone(),
747            label,
748            source_file: str_path.to_string(),
749            source_location: Some(format!("L{line}")),
750            node_type,
751            community: None,
752            extra: HashMap::new(),
753        });
754
755        let parent_nid = parent_class_nid.unwrap_or(file_nid);
756        edges.push(make_edge(parent_nid, &func_nid, relation, str_path, line));
757    }
758
759    // Record the function body bytes for call-graph inference
760    if let Some(body) = node.child_by_field_name(config.body_field) {
761        function_bodies.push((func_nid, body.start_byte(), body.end_byte()));
762    } else {
763        // Fallback: use the whole node as body
764        function_bodies.push((func_nid, node.start_byte(), node.end_byte()));
765    }
766}
767
768// ═══════════════════════════════════════════════════════════════════════════
769// Import handler
770// ═══════════════════════════════════════════════════════════════════════════
771
772fn extract_import(
773    node: Node,
774    source: &[u8],
775    file_nid: &str,
776    str_path: &str,
777    lang: &str,
778    edges: &mut Vec<GraphEdge>,
779    nodes: &mut Vec<GraphNode>,
780) {
781    let line = node.start_position().row + 1;
782    let import_text = node_text(node, source);
783
784    match lang {
785        "python" => extract_python_import(node, source, file_nid, str_path, line, edges, nodes),
786        "javascript" | "typescript" => {
787            extract_js_import(node, source, file_nid, str_path, line, edges, nodes)
788        }
789        "rust" => {
790            // `use foo::bar::Baz;` → module = full text after "use"
791            let module = import_text
792                .strip_prefix("use ")
793                .unwrap_or(&import_text)
794                .trim_end_matches(';')
795                .trim();
796            add_import_node(
797                nodes,
798                edges,
799                file_nid,
800                str_path,
801                line,
802                module,
803                NodeType::Module,
804            );
805        }
806        "go" => {
807            extract_go_import(node, source, file_nid, str_path, line, edges, nodes);
808        }
809        "java" => {
810            // `import java.util.List;` → extract path after "import"
811            let text = node_text(node, source);
812            let module = text
813                .trim()
814                .strip_prefix("import ")
815                .unwrap_or(&text)
816                .strip_prefix("static ")
817                .unwrap_or_else(|| text.trim().strip_prefix("import ").unwrap_or(&text))
818                .trim_end_matches(';')
819                .trim();
820            add_import_node(
821                nodes,
822                edges,
823                file_nid,
824                str_path,
825                line,
826                module,
827                NodeType::Module,
828            );
829        }
830        "c" | "cpp" => {
831            // `#include <stdio.h>` or `#include "myheader.h"`
832            let text = node_text(node, source);
833            let module = text
834                .trim()
835                .strip_prefix("#include")
836                .unwrap_or(&text)
837                .trim()
838                .trim_matches(&['<', '>', '"'][..])
839                .trim();
840            add_import_node(
841                nodes,
842                edges,
843                file_nid,
844                str_path,
845                line,
846                module,
847                NodeType::Module,
848            );
849        }
850        "csharp" => {
851            // `using System.Collections.Generic;`
852            let text = node_text(node, source);
853            let module = text
854                .trim()
855                .strip_prefix("using ")
856                .unwrap_or(&text)
857                .trim_end_matches(';')
858                .trim();
859            add_import_node(
860                nodes,
861                edges,
862                file_nid,
863                str_path,
864                line,
865                module,
866                NodeType::Module,
867            );
868        }
869        _ => {
870            add_import_node(
871                nodes,
872                edges,
873                file_nid,
874                str_path,
875                line,
876                &import_text,
877                NodeType::Module,
878            );
879        }
880    }
881}
882
883fn extract_python_import(
884    node: Node,
885    source: &[u8],
886    file_nid: &str,
887    str_path: &str,
888    line: usize,
889    edges: &mut Vec<GraphEdge>,
890    nodes: &mut Vec<GraphNode>,
891) {
892    // `import_statement`: `import os` → child "dotted_name"
893    // `import_from_statement`: `from pathlib import Path` → module_name + name children
894    let kind = node.kind();
895
896    if kind == "import_from_statement" {
897        let module = node
898            .child_by_field_name("module_name")
899            .map(|n| node_text(n, source))
900            .unwrap_or_default();
901        // Iterate over named import children
902        let mut cursor = node.walk();
903        for child in node.children(&mut cursor) {
904            if child.kind() == "dotted_name" || child.kind() == "aliased_import" {
905                let name_node = if child.kind() == "aliased_import" {
906                    child.child_by_field_name("name")
907                } else {
908                    Some(child)
909                };
910                if let Some(nn) = name_node {
911                    let name = node_text(nn, source);
912                    if name != module {
913                        let full = if module.is_empty() {
914                            name
915                        } else {
916                            format!("{module}.{name}")
917                        };
918                        add_import_node(
919                            nodes,
920                            edges,
921                            file_nid,
922                            str_path,
923                            line,
924                            &full,
925                            NodeType::Module,
926                        );
927                    }
928                }
929            }
930        }
931        // If no names were added (e.g. `from x import *`), add the module
932        let import_count = edges.iter().filter(|e| e.relation == "imports").count();
933        if import_count == 0 && !module.is_empty() {
934            add_import_node(
935                nodes,
936                edges,
937                file_nid,
938                str_path,
939                line,
940                &module,
941                NodeType::Module,
942            );
943        }
944    } else {
945        // `import os`, `import os.path`
946        let mut cursor = node.walk();
947        for child in node.children(&mut cursor) {
948            if child.kind() == "dotted_name" || child.kind() == "aliased_import" {
949                let name_node = if child.kind() == "aliased_import" {
950                    child.child_by_field_name("name")
951                } else {
952                    Some(child)
953                };
954                if let Some(nn) = name_node {
955                    let name = node_text(nn, source);
956                    add_import_node(
957                        nodes,
958                        edges,
959                        file_nid,
960                        str_path,
961                        line,
962                        &name,
963                        NodeType::Module,
964                    );
965                }
966            }
967        }
968    }
969}
970
971fn extract_js_import(
972    node: Node,
973    source: &[u8],
974    file_nid: &str,
975    str_path: &str,
976    line: usize,
977    edges: &mut Vec<GraphEdge>,
978    nodes: &mut Vec<GraphNode>,
979) {
980    // JS import: `import { X, Y } from 'module'` or `import X from 'module'`
981    // The source/module is in the `source` field
982    let module = node
983        .child_by_field_name("source")
984        .map(|n| {
985            let t = node_text(n, source);
986            t.trim_matches(&['"', '\''][..]).to_string()
987        })
988        .unwrap_or_default();
989
990    // Collect imported identifiers
991    let mut found_names = false;
992    let mut cursor = node.walk();
993    for child in node.children(&mut cursor) {
994        if child.kind() == "import_clause" {
995            let mut inner_cursor = child.walk();
996            for inner in child.children(&mut inner_cursor) {
997                match inner.kind() {
998                    "identifier" => {
999                        let name = node_text(inner, source);
1000                        let full = format!("{module}/{name}");
1001                        add_import_node(
1002                            nodes,
1003                            edges,
1004                            file_nid,
1005                            str_path,
1006                            line,
1007                            &full,
1008                            NodeType::Module,
1009                        );
1010                        found_names = true;
1011                    }
1012                    "named_imports" => {
1013                        let mut spec_cursor = inner.walk();
1014                        for spec in inner.children(&mut spec_cursor) {
1015                            if spec.kind() == "import_specifier" {
1016                                let name = spec
1017                                    .child_by_field_name("name")
1018                                    .map(|n| node_text(n, source))
1019                                    .unwrap_or_else(|| node_text(spec, source));
1020                                let full = format!("{module}/{name}");
1021                                add_import_node(
1022                                    nodes,
1023                                    edges,
1024                                    file_nid,
1025                                    str_path,
1026                                    line,
1027                                    &full,
1028                                    NodeType::Module,
1029                                );
1030                                found_names = true;
1031                            }
1032                        }
1033                    }
1034                    _ => {}
1035                }
1036            }
1037        }
1038    }
1039
1040    if !found_names && !module.is_empty() {
1041        add_import_node(
1042            nodes,
1043            edges,
1044            file_nid,
1045            str_path,
1046            line,
1047            &module,
1048            NodeType::Module,
1049        );
1050    }
1051}
1052
1053fn extract_go_import(
1054    node: Node,
1055    source: &[u8],
1056    file_nid: &str,
1057    str_path: &str,
1058    line: usize,
1059    edges: &mut Vec<GraphEdge>,
1060    nodes: &mut Vec<GraphNode>,
1061) {
1062    // Go imports: `import "fmt"` or `import ( "fmt" \n "os" )`
1063    let mut cursor = node.walk();
1064    for child in node.children(&mut cursor) {
1065        match child.kind() {
1066            "import_spec" => {
1067                if let Some(path_node) = child.child_by_field_name("path") {
1068                    let module = node_text(path_node, source).trim_matches('"').to_string();
1069                    let spec_line = child.start_position().row + 1;
1070                    add_import_node(
1071                        nodes,
1072                        edges,
1073                        file_nid,
1074                        str_path,
1075                        spec_line,
1076                        &module,
1077                        NodeType::Package,
1078                    );
1079                }
1080            }
1081            "import_spec_list" => {
1082                let mut inner = child.walk();
1083                for spec in child.children(&mut inner) {
1084                    if spec.kind() == "import_spec"
1085                        && let Some(path_node) = spec.child_by_field_name("path")
1086                    {
1087                        let module = node_text(path_node, source).trim_matches('"').to_string();
1088                        let spec_line = spec.start_position().row + 1;
1089                        add_import_node(
1090                            nodes,
1091                            edges,
1092                            file_nid,
1093                            str_path,
1094                            spec_line,
1095                            &module,
1096                            NodeType::Package,
1097                        );
1098                    }
1099                }
1100            }
1101            "interpreted_string_literal" => {
1102                // Single import: `import "fmt"`
1103                let module = node_text(child, source).trim_matches('"').to_string();
1104                add_import_node(
1105                    nodes,
1106                    edges,
1107                    file_nid,
1108                    str_path,
1109                    line,
1110                    &module,
1111                    NodeType::Package,
1112                );
1113            }
1114            _ => {}
1115        }
1116    }
1117}
1118
1119// ═══════════════════════════════════════════════════════════════════════════
1120// Helpers
1121// ═══════════════════════════════════════════════════════════════════════════
1122
1123/// Extract text from a tree-sitter node.
1124fn node_text(node: Node, source: &[u8]) -> String {
1125    node.utf8_text(source).unwrap_or("").to_string()
1126}
1127
1128/// Get the name of a definition node via its field name.
1129fn get_name(node: Node, source: &[u8], field: &str) -> Option<String> {
1130    let name_node = node.child_by_field_name(field)?;
1131    // For C/C++ declarators, unwrap nested declarators to find the identifier
1132    let text = unwrap_declarator_name(name_node, source);
1133    if text.is_empty() { None } else { Some(text) }
1134}
1135
1136/// Recursively unwrap C/C++ declarators (function_declarator, pointer_declarator, etc.)
1137/// to find the underlying identifier name.
1138fn unwrap_declarator_name(node: Node, source: &[u8]) -> String {
1139    match node.kind() {
1140        "function_declarator"
1141        | "pointer_declarator"
1142        | "reference_declarator"
1143        | "parenthesized_declarator" => {
1144            // The actual name is in the "declarator" field or first named child
1145            if let Some(inner) = node.child_by_field_name("declarator") {
1146                return unwrap_declarator_name(inner, source);
1147            }
1148            // Fallback: look for an identifier child
1149            let mut cursor = node.walk();
1150            for child in node.children(&mut cursor) {
1151                if child.kind() == "identifier" || child.kind() == "field_identifier" {
1152                    return node_text(child, source);
1153                }
1154            }
1155            node_text(node, source)
1156        }
1157        "qualified_identifier" | "scoped_identifier" => {
1158            // C++ qualified names like `Foo::bar` — use the "name" field
1159            if let Some(name) = node.child_by_field_name("name") {
1160                return node_text(name, source);
1161            }
1162            node_text(node, source)
1163        }
1164        _ => node_text(node, source),
1165    }
1166}
1167
1168fn add_import_node(
1169    nodes: &mut Vec<GraphNode>,
1170    edges: &mut Vec<GraphEdge>,
1171    file_nid: &str,
1172    str_path: &str,
1173    line: usize,
1174    module: &str,
1175    node_type: NodeType,
1176) {
1177    let import_id = make_id(&[str_path, "import", module]);
1178    nodes.push(GraphNode {
1179        id: import_id.clone(),
1180        label: module.to_string(),
1181        source_file: str_path.to_string(),
1182        source_location: Some(format!("L{line}")),
1183        node_type,
1184        community: None,
1185        extra: HashMap::new(),
1186    });
1187    edges.push(GraphEdge {
1188        source: file_nid.to_string(),
1189        target: import_id,
1190        relation: "imports".to_string(),
1191        confidence: Confidence::Extracted,
1192        confidence_score: Confidence::Extracted.default_score(),
1193        source_file: str_path.to_string(),
1194        source_location: Some(format!("L{line}")),
1195        weight: 1.0,
1196        extra: HashMap::new(),
1197    });
1198}
1199
1200fn make_edge(
1201    source_id: &str,
1202    target_id: &str,
1203    relation: &str,
1204    source_file: &str,
1205    line: usize,
1206) -> GraphEdge {
1207    GraphEdge {
1208        source: source_id.to_string(),
1209        target: target_id.to_string(),
1210        relation: relation.to_string(),
1211        confidence: Confidence::Extracted,
1212        confidence_score: Confidence::Extracted.default_score(),
1213        source_file: source_file.to_string(),
1214        source_location: Some(format!("L{line}")),
1215        weight: 1.0,
1216        extra: HashMap::new(),
1217    }
1218}
1219
1220// ═══════════════════════════════════════════════════════════════════════════
1221// Tests
1222// ═══════════════════════════════════════════════════════════════════════════
1223
1224// Tests moved to tests/treesitter.rs (integration tests)