Skip to main content

cgx_engine/parsers/
ts.rs

1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{
4    CommentKind, CommentTag, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult,
5};
6use crate::walker::SourceFile;
7
8pub struct TypeScriptParser;
9
10impl TypeScriptParser {
11    pub fn new() -> Self {
12        Self
13    }
14}
15
16impl Default for TypeScriptParser {
17    fn default() -> Self {
18        Self::new()
19    }
20}
21
22fn is_jsx_extension(path: &str) -> bool {
23    path.ends_with(".tsx") || path.ends_with(".jsx")
24}
25
26impl LanguageParser for TypeScriptParser {
27    fn extensions(&self) -> &[&str] {
28        &["ts", "tsx", "js", "jsx", "mjs", "cjs"]
29    }
30
31    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
32        // TSX/JSX files must use the TSX grammar; TypeScript grammar rejects JSX syntax
33        // and produces error nodes with wrong line positions for every JSX element.
34        let language = if is_jsx_extension(&file.relative_path) {
35            tree_sitter_typescript::language_tsx()
36        } else {
37            tree_sitter_typescript::language_typescript()
38        };
39
40        let mut parser = Parser::new();
41        parser.set_language(&language)?;
42
43        let tree = parser
44            .parse(&file.content, None)
45            .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
46
47        let source_bytes = file.content.as_bytes();
48        let root = tree.root_node();
49        let mut nodes = Vec::new();
50        let mut edges = Vec::new();
51
52        let fp = file_node_id(&file.relative_path);
53
54        // Parse function declarations
55        if let Ok(query) = Query::new(
56            &language,
57            "(function_declaration name: (identifier) @name) @fn",
58        ) {
59            extract_nodes(
60                &mut nodes,
61                &mut edges,
62                file,
63                &query,
64                root,
65                source_bytes,
66                NodeKind::Function,
67                "fn",
68                &fp,
69            );
70        }
71
72        // Parse arrow functions / variable declarations with arrow
73        if let Ok(query) = Query::new(
74            &language,
75            "(variable_declarator name: (identifier) @name value: (arrow_function) @fn)",
76        ) {
77            extract_nodes(
78                &mut nodes,
79                &mut edges,
80                file,
81                &query,
82                root,
83                source_bytes,
84                NodeKind::Function,
85                "fn",
86                &fp,
87            );
88        }
89
90        // Parse variable declarations with function expressions
91        if let Ok(query) = Query::new(
92            &language,
93            "(variable_declarator name: (identifier) @name value: (function_expression) @fn)",
94        ) {
95            extract_nodes(
96                &mut nodes,
97                &mut edges,
98                file,
99                &query,
100                root,
101                source_bytes,
102                NodeKind::Function,
103                "fn",
104                &fp,
105            );
106        }
107
108        // Parse class declarations
109        if let Ok(query) = Query::new(
110            &language,
111            "(class_declaration name: (type_identifier) @name) @cls",
112        ) {
113            extract_nodes(
114                &mut nodes,
115                &mut edges,
116                file,
117                &query,
118                root,
119                source_bytes,
120                NodeKind::Class,
121                "cls",
122                &fp,
123            );
124        }
125
126        // Parse method definitions
127        if let Ok(query) = Query::new(
128            &language,
129            "(method_definition name: (property_identifier) @name) @m",
130        ) {
131            extract_nodes(
132                &mut nodes,
133                &mut edges,
134                file,
135                &query,
136                root,
137                source_bytes,
138                NodeKind::Function,
139                "fn",
140                &fp,
141            );
142        }
143
144        // Parse imports — walk the tree directly to find import statements
145        extract_imports(&mut edges, root, source_bytes, &fp, file);
146
147        // Parse exports
148        if let Ok(query) = Query::new(
149            &language,
150            "(export_statement (function_declaration name: (identifier) @name) @expr)",
151        ) {
152            process_exports(
153                &mut nodes,
154                &mut edges,
155                file,
156                &query,
157                root,
158                source_bytes,
159                &fp,
160                "fn",
161            );
162        }
163
164        if let Ok(query) = Query::new(
165            &language,
166            "(export_statement (class_declaration name: (type_identifier) @name) @expr)",
167        ) {
168            process_exports(
169                &mut nodes,
170                &mut edges,
171                file,
172                &query,
173                root,
174                source_bytes,
175                &fp,
176                "cls",
177            );
178        }
179
180        // Extract CALLS edges by walking the AST with function context tracking
181        extract_calls(&mut edges, root, source_bytes, file);
182
183        // Mark exported nodes based on export statements
184        let exported_names = collect_exported_names(root, source_bytes);
185        for node in &mut nodes {
186            if exported_names.contains(&node.name) {
187                node.metadata = serde_json::json!({"exported": true});
188            }
189        }
190
191        // Extract JSX expression comments and annotation tags (TODO/FIXME/etc.)
192        let mut comment_tags = Vec::new();
193        extract_jsx_comments(&mut comment_tags, root, source_bytes, false);
194
195        Ok(ParseResult {
196            nodes,
197            edges,
198            comment_tags,
199        })
200    }
201}
202
203fn collect_exported_names(
204    root: tree_sitter::Node,
205    source_bytes: &[u8],
206) -> std::collections::HashSet<String> {
207    let mut exported = std::collections::HashSet::new();
208    collect_exported_names_walk(root, source_bytes, &mut exported);
209    exported
210}
211
212fn collect_exported_names_walk(
213    node: tree_sitter::Node,
214    source_bytes: &[u8],
215    exported: &mut std::collections::HashSet<String>,
216) {
217    if node.kind() == "export_statement" {
218        // Walk children to find identifiers/function names
219        for i in 0..node.child_count() {
220            if let Some(child) = node.child(i) {
221                match child.kind() {
222                    "function_declaration" | "class_declaration" => {
223                        if let Some(name_node) = child.child_by_field_name("name") {
224                            exported.insert(node_text(name_node, source_bytes));
225                        }
226                    }
227                    "variable_declaration" => {
228                        // export const foo = ...
229                        for j in 0..child.child_count() {
230                            if let Some(decl) = child.child(j) {
231                                if decl.kind() == "variable_declarator" {
232                                    if let Some(name_node) = decl.child_by_field_name("name") {
233                                        exported.insert(node_text(name_node, source_bytes));
234                                    }
235                                }
236                            }
237                        }
238                    }
239                    "export_clause" => {
240                        // export { foo, bar }
241                        for j in 0..child.child_count() {
242                            if let Some(spec) = child.child(j) {
243                                if spec.kind() == "export_specifier" {
244                                    if let Some(name_node) = spec.child_by_field_name("name") {
245                                        exported.insert(node_text(name_node, source_bytes));
246                                    }
247                                }
248                            }
249                        }
250                    }
251                    _ => {}
252                }
253            }
254        }
255    }
256    // recurse
257    for i in 0..node.child_count() {
258        if let Some(child) = node.child(i) {
259            collect_exported_names_walk(child, source_bytes, exported);
260        }
261    }
262}
263
264fn file_node_id(rel_path: &str) -> String {
265    format!("file:{}", rel_path)
266}
267
268#[allow(clippy::too_many_arguments)]
269fn extract_nodes(
270    nodes: &mut Vec<NodeDef>,
271    edges: &mut Vec<EdgeDef>,
272    file: &SourceFile,
273    query: &Query,
274    root: tree_sitter::Node,
275    source_bytes: &[u8],
276    kind: NodeKind,
277    prefix: &str,
278    file_id: &str,
279) {
280    let mut cursor = QueryCursor::new();
281    for m in cursor.matches(query, root, source_bytes) {
282        let Some(name_capture) = m
283            .captures
284            .iter()
285            .find(|c| query.capture_names()[c.index as usize] == "name")
286        else {
287            continue;
288        };
289
290        let name = unquote_str(&source_bytes[name_capture.node.byte_range()]);
291        let node_start = name_capture.node.start_position();
292
293        // Use the body node's end position so the snippet covers the full function/class body
294        let body_end = m
295            .captures
296            .iter()
297            .find(|c| {
298                let cap_name = &query.capture_names()[c.index as usize];
299                *cap_name == "fn" || *cap_name == "cls" || *cap_name == "m"
300            })
301            .map(|c| c.node.end_position())
302            .unwrap_or_else(|| name_capture.node.end_position());
303
304        let Some(_fn_capture) = m.captures.iter().find(|c| {
305            let cap_name = &query.capture_names()[c.index as usize];
306            *cap_name == "fn" || *cap_name == "cls" || *cap_name == "m"
307        }) else {
308            continue;
309        };
310
311        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
312
313        nodes.push(NodeDef {
314            id,
315            kind: kind.clone(),
316            name,
317            path: file.relative_path.clone(),
318            line_start: node_start.row as u32 + 1,
319            line_end: body_end.row as u32 + 1,
320            ..Default::default()
321        });
322
323        edges.push(EdgeDef {
324            src: file_id.to_string(),
325            dst: format!(
326                "{}:{}:{}",
327                prefix,
328                file.relative_path,
329                unquote_str(&source_bytes[name_capture.node.byte_range()])
330            ),
331            kind: EdgeKind::Exports,
332            ..Default::default()
333        });
334    }
335}
336
337#[allow(clippy::too_many_arguments)]
338fn process_exports(
339    _nodes: &mut Vec<NodeDef>,
340    edges: &mut Vec<EdgeDef>,
341    file: &SourceFile,
342    query: &Query,
343    root: tree_sitter::Node,
344    source_bytes: &[u8],
345    file_id: &str,
346    prefix: &str,
347) {
348    let mut cursor = QueryCursor::new();
349    for m in cursor.matches(query, root, source_bytes) {
350        let Some(name_capture) = m
351            .captures
352            .iter()
353            .find(|c| query.capture_names()[c.index as usize] == "name")
354        else {
355            continue;
356        };
357
358        let name = node_text(name_capture.node, source_bytes);
359
360        edges.push(EdgeDef {
361            src: file_id.to_string(),
362            dst: format!("{}:{}:{}", prefix, file.relative_path, name),
363            kind: EdgeKind::Exports,
364            ..Default::default()
365        });
366    }
367}
368
369fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
370    node.utf8_text(source).unwrap_or("").to_string()
371}
372
373fn extract_imports(
374    edges: &mut Vec<EdgeDef>,
375    root: tree_sitter::Node,
376    source_bytes: &[u8],
377    file_id: &str,
378    file: &SourceFile,
379) {
380    // Walk the entire tree (not just root children) to find imports and requires
381    let mut cursor = root.walk();
382    traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
383}
384
385fn traverse_imports(
386    edges: &mut Vec<EdgeDef>,
387    node: tree_sitter::Node,
388    source_bytes: &[u8],
389    file_id: &str,
390    file: &SourceFile,
391    cursor: &mut tree_sitter::TreeCursor,
392) {
393    if node.kind() == "import_statement" {
394        for j in 0..node.child_count() {
395            let Some(import_child) = node.child(j) else {
396                continue;
397            };
398            if import_child.kind() == "string" {
399                let import_path = unquote_str(&source_bytes[import_child.byte_range()]);
400                if import_path.starts_with('.') {
401                    let resolved = resolve_import_path(&file.relative_path, &import_path);
402                    if !resolved.is_empty() {
403                        edges.push(EdgeDef {
404                            src: file_id.to_string(),
405                            dst: file_node_id(&resolved),
406                            kind: EdgeKind::Imports,
407                            ..Default::default()
408                        });
409                    }
410                }
411                break;
412            }
413        }
414    } else if node.kind() == "call_expression" {
415        // Check for require('...')
416        if let Some(func) = node.child_by_field_name("function") {
417            if func.kind() == "identifier" && node_text(func, source_bytes) == "require" {
418                if let Some(args) = node.child_by_field_name("arguments") {
419                    for k in 0..args.child_count() {
420                        let Some(arg) = args.child(k) else { continue };
421                        if arg.kind() == "string" {
422                            let import_path = unquote_str(&source_bytes[arg.byte_range()]);
423                            if import_path.starts_with('.') {
424                                let resolved =
425                                    resolve_import_path(&file.relative_path, &import_path);
426                                if !resolved.is_empty() {
427                                    edges.push(EdgeDef {
428                                        src: file_id.to_string(),
429                                        dst: file_node_id(&resolved),
430                                        kind: EdgeKind::Imports,
431                                        ..Default::default()
432                                    });
433                                }
434                            }
435                            break;
436                        }
437                    }
438                }
439            }
440        }
441    }
442
443    if cursor.goto_first_child() {
444        loop {
445            let child = cursor.node();
446            traverse_imports(edges, child, source_bytes, file_id, file, cursor);
447            if !cursor.goto_next_sibling() {
448                break;
449            }
450        }
451        cursor.goto_parent();
452    }
453}
454
455fn unquote_str(s: &[u8]) -> String {
456    let s = std::str::from_utf8(s).unwrap_or("");
457    s.trim().trim_matches('\'').trim_matches('"').to_string()
458}
459
460fn resolve_import_path(current: &str, import: &str) -> String {
461    let mut parts: Vec<&str> = current.split('/').collect();
462    parts.pop(); // remove filename
463
464    for segment in import.split('/') {
465        match segment {
466            "." => {}
467            ".." => {
468                parts.pop();
469            }
470            _ => parts.push(segment),
471        }
472    }
473
474    parts.join("/")
475}
476
477/// Walk the AST tracking function context, emitting CALLS edges for each call_expression.
478fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
479    let mut fn_stack: Vec<String> = Vec::new();
480    walk_for_calls(edges, root, source, file, &mut fn_stack);
481}
482
483fn is_fn_node(kind: &str) -> bool {
484    matches!(
485        kind,
486        "function_declaration"
487            | "function"
488            | "arrow_function"
489            | "method_definition"
490            | "generator_function_declaration"
491            | "generator_function"
492    )
493}
494
495fn fn_name_from_node<'a>(node: Node<'a>, source: &[u8], file: &SourceFile) -> Option<String> {
496    // function_declaration / generator_function_declaration: has `name` field
497    if let Some(name_node) = node.child_by_field_name("name") {
498        let name = name_node.utf8_text(source).unwrap_or("").to_string();
499        if !name.is_empty() {
500            return Some(format!("fn:{}:{}", file.relative_path, name));
501        }
502    }
503    // Arrow/anonymous assigned to variable: look at parent variable_declarator
504    let parent = node.parent()?;
505    if parent.kind() == "variable_declarator" {
506        if let Some(name_node) = parent.child_by_field_name("name") {
507            let name = name_node.utf8_text(source).unwrap_or("").to_string();
508            if !name.is_empty() {
509                return Some(format!("fn:{}:{}", file.relative_path, name));
510            }
511        }
512    }
513    None
514}
515
516fn walk_for_calls(
517    edges: &mut Vec<EdgeDef>,
518    node: Node,
519    source: &[u8],
520    file: &SourceFile,
521    fn_stack: &mut Vec<String>,
522) {
523    let kind = node.kind();
524    let pushed = is_fn_node(kind);
525
526    if pushed {
527        if let Some(id) = fn_name_from_node(node, source, file) {
528            fn_stack.push(id);
529        } else {
530            // anonymous — push a sentinel so pop is balanced
531            fn_stack.push(String::new());
532        }
533    }
534
535    // Effective caller: innermost named function, or the file node for module-level code
536    let caller_id: Option<String> = fn_stack
537        .iter()
538        .rev()
539        .find(|s| !s.is_empty())
540        .cloned()
541        .or_else(|| Some(format!("file:{}", file.relative_path)));
542
543    if kind == "call_expression" {
544        if let Some(ref caller) = caller_id {
545            let func_node = node.child_by_field_name("function");
546            let callee_name = func_node
547                .as_ref()
548                .and_then(|func| match func.kind() {
549                    "identifier" => Some(func.utf8_text(source).unwrap_or("").to_string()),
550                    "member_expression" => func
551                        .child_by_field_name("property")
552                        .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
553                    _ => None,
554                })
555                .unwrap_or_default();
556
557            if !callee_name.is_empty() && callee_name != "require" {
558                edges.push(EdgeDef {
559                    src: caller.clone(),
560                    dst: callee_name,
561                    kind: EdgeKind::Calls,
562                    confidence: 0.7,
563                    ..Default::default()
564                });
565            }
566
567            // For `Obj.method()` also emit a CALLS edge to the object identifier so
568            // classes used only via static methods aren't flagged as dead code.
569            if let Some(func) = func_node {
570                if func.kind() == "member_expression" {
571                    if let Some(obj) = func.child_by_field_name("object") {
572                        if obj.kind() == "identifier" {
573                            let obj_name = obj.utf8_text(source).unwrap_or("").to_string();
574                            if !obj_name.is_empty() {
575                                edges.push(EdgeDef {
576                                    src: caller.clone(),
577                                    dst: obj_name,
578                                    kind: EdgeKind::Calls,
579                                    confidence: 0.6,
580                                    ..Default::default()
581                                });
582                            }
583                        }
584                    }
585                }
586            }
587        }
588    }
589
590    // new_expression: `new ClassName(...)` — emit CALLS edge to the constructor
591    if kind == "new_expression" {
592        if let Some(ref caller) = caller_id {
593            let constructor_name = node
594                .child_by_field_name("constructor")
595                .and_then(|c| match c.kind() {
596                    "identifier" => Some(c.utf8_text(source).unwrap_or("").to_string()),
597                    "member_expression" => c
598                        .child_by_field_name("property")
599                        .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
600                    _ => None,
601                })
602                .unwrap_or_default();
603
604            if !constructor_name.is_empty() {
605                edges.push(EdgeDef {
606                    src: caller.clone(),
607                    dst: constructor_name,
608                    kind: EdgeKind::Calls,
609                    confidence: 0.7,
610                    ..Default::default()
611                });
612            }
613        }
614    }
615
616    // JSX component usage: <ComponentName ... /> and <ComponentName ...>
617    // Treat JSX elements as calls from the enclosing function to the component.
618    if kind == "jsx_opening_element" || kind == "jsx_self_closing_element" {
619        if let Some(ref caller_id) = caller_id {
620            let tag_name = node
621                .child_by_field_name("name")
622                .map(|n| n.utf8_text(source).unwrap_or("").to_string())
623                .unwrap_or_default();
624
625            // Only emit edges for PascalCase or camelCase user-defined components.
626            // Lowercase tags like <div>, <span> are HTML intrinsics — skip them.
627            let is_component = tag_name
628                .chars()
629                .next()
630                .map(|c| {
631                    c.is_uppercase()
632                        || (c.is_lowercase() && tag_name.len() > 3 && tag_name.contains('.'))
633                })
634                .unwrap_or(false);
635
636            if is_component {
637                // Strip member access for <Namespace.Component /> — use only the last segment
638                let callee = tag_name
639                    .split('.')
640                    .next_back()
641                    .unwrap_or(&tag_name)
642                    .to_string();
643                edges.push(EdgeDef {
644                    src: caller_id.clone(),
645                    dst: callee,
646                    kind: EdgeKind::Calls,
647                    confidence: 0.6,
648                    ..Default::default()
649                });
650            }
651        }
652    }
653
654    let mut cursor = node.walk();
655    if cursor.goto_first_child() {
656        loop {
657            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
658            if !cursor.goto_next_sibling() {
659                break;
660            }
661        }
662    }
663
664    if pushed {
665        fn_stack.pop();
666    }
667}
668
669const ANNOTATION_TAGS: &[&str] = &[
670    "TODO", "FIXME", "HACK", "NOTE", "BUG", "OPTIMIZE", "WARN", "XXX",
671];
672
673/// Recursively walk the AST extracting annotation comments (TODO/FIXME/etc.).
674/// `in_jsx_expression` tracks whether we are inside a `jsx_expression` node,
675/// which is how `{/* ... */}` comments appear in the TSX grammar.
676fn extract_jsx_comments(
677    tags: &mut Vec<CommentTag>,
678    node: Node,
679    source: &[u8],
680    in_jsx_expression: bool,
681) {
682    let kind = node.kind();
683
684    // Track whether we're entering a jsx_expression wrapper
685    let now_in_jsx = in_jsx_expression || kind == "jsx_expression";
686
687    if kind == "comment" {
688        let raw = node.utf8_text(source).unwrap_or("").trim();
689
690        let comment_kind = if in_jsx_expression {
691            // Strip `/*` / `*/` delimiters and check for commented-out JSX code
692            let inner = raw.trim_start_matches("/*").trim_end_matches("*/").trim();
693            if inner.starts_with('<') || inner.contains("</") || inner.contains("/>") {
694                CommentKind::JsxCommentedCode
695            } else {
696                CommentKind::JsxExpression
697            }
698        } else {
699            CommentKind::Standard
700        };
701
702        let upper = raw.to_uppercase();
703        for &tag in ANNOTATION_TAGS {
704            if upper.contains(tag) {
705                tags.push(CommentTag {
706                    tag_type: tag.to_string(),
707                    text: raw.to_string(),
708                    line: node.start_position().row as u32 + 1,
709                    comment_kind: comment_kind.clone(),
710                });
711                break;
712            }
713        }
714    }
715
716    let mut cursor = node.walk();
717    if cursor.goto_first_child() {
718        loop {
719            extract_jsx_comments(tags, cursor.node(), source, now_in_jsx);
720            if !cursor.goto_next_sibling() {
721                break;
722            }
723        }
724    }
725}