Skip to main content

cgx_engine/parsers/
ts.rs

1use tree_sitter::{Node, Parser, Query, QueryCursor};
2
3use crate::parser::{
4    CommentKind, CommentTag, EdgeDef, EdgeKind, LanguageParser, NodeDef, NodeKind, ParseResult,
5};
6use crate::walker::SourceFile;
7
8pub struct TypeScriptParser;
9
10impl TypeScriptParser {
11    pub fn new() -> Self {
12        Self
13    }
14}
15
16impl Default for TypeScriptParser {
17    fn default() -> Self {
18        Self::new()
19    }
20}
21
22fn is_jsx_extension(path: &str) -> bool {
23    path.ends_with(".tsx") || path.ends_with(".jsx")
24}
25
26impl LanguageParser for TypeScriptParser {
27    fn extensions(&self) -> &[&str] {
28        &["ts", "tsx", "js", "jsx", "mjs", "cjs"]
29    }
30
31    fn extract(&self, file: &SourceFile) -> anyhow::Result<ParseResult> {
32        // TSX/JSX files must use the TSX grammar; TypeScript grammar rejects JSX syntax
33        // and produces error nodes with wrong line positions for every JSX element.
34        let language = if is_jsx_extension(&file.relative_path) {
35            tree_sitter_typescript::language_tsx()
36        } else {
37            tree_sitter_typescript::language_typescript()
38        };
39
40        let mut parser = Parser::new();
41        parser.set_language(&language)?;
42
43        let tree = parser
44            .parse(&file.content, None)
45            .ok_or_else(|| anyhow::anyhow!("failed to parse {}", file.relative_path))?;
46
47        let source_bytes = file.content.as_bytes();
48        let root = tree.root_node();
49        let mut nodes = Vec::new();
50        let mut edges = Vec::new();
51
52        let fp = file_node_id(&file.relative_path);
53
54        // Parse function declarations
55        if let Ok(query) = Query::new(
56            &language,
57            "(function_declaration name: (identifier) @name) @fn",
58        ) {
59            extract_nodes(
60                &mut nodes,
61                &mut edges,
62                file,
63                &query,
64                root,
65                source_bytes,
66                NodeKind::Function,
67                "fn",
68                &fp,
69            );
70        }
71
72        // Parse arrow functions / variable declarations with arrow
73        if let Ok(query) = Query::new(
74            &language,
75            "(variable_declarator name: (identifier) @name value: (arrow_function) @fn)",
76        ) {
77            extract_nodes(
78                &mut nodes,
79                &mut edges,
80                file,
81                &query,
82                root,
83                source_bytes,
84                NodeKind::Function,
85                "fn",
86                &fp,
87            );
88        }
89
90        // Parse variable declarations with function expressions
91        if let Ok(query) = Query::new(
92            &language,
93            "(variable_declarator name: (identifier) @name value: (function_expression) @fn)",
94        ) {
95            extract_nodes(
96                &mut nodes,
97                &mut edges,
98                file,
99                &query,
100                root,
101                source_bytes,
102                NodeKind::Function,
103                "fn",
104                &fp,
105            );
106        }
107
108        // Parse class declarations
109        if let Ok(query) = Query::new(
110            &language,
111            "(class_declaration name: (type_identifier) @name) @cls",
112        ) {
113            extract_nodes(
114                &mut nodes,
115                &mut edges,
116                file,
117                &query,
118                root,
119                source_bytes,
120                NodeKind::Class,
121                "cls",
122                &fp,
123            );
124        }
125
126        // Parse method definitions
127        if let Ok(query) = Query::new(
128            &language,
129            "(method_definition name: (property_identifier) @name) @m",
130        ) {
131            extract_nodes(
132                &mut nodes,
133                &mut edges,
134                file,
135                &query,
136                root,
137                source_bytes,
138                NodeKind::Function,
139                "fn",
140                &fp,
141            );
142        }
143
144        // Parse imports — walk the tree directly to find import statements
145        extract_imports(&mut edges, root, source_bytes, &fp, file);
146
147        // Parse exports
148        if let Ok(query) = Query::new(
149            &language,
150            "(export_statement (function_declaration name: (identifier) @name) @expr)",
151        ) {
152            process_exports(
153                &mut nodes,
154                &mut edges,
155                file,
156                &query,
157                root,
158                source_bytes,
159                &fp,
160                "fn",
161            );
162        }
163
164        if let Ok(query) = Query::new(
165            &language,
166            "(export_statement (class_declaration name: (type_identifier) @name) @expr)",
167        ) {
168            process_exports(
169                &mut nodes,
170                &mut edges,
171                file,
172                &query,
173                root,
174                source_bytes,
175                &fp,
176                "cls",
177            );
178        }
179
180        // Extract CALLS edges by walking the AST with function context tracking
181        extract_calls(&mut edges, root, source_bytes, file);
182
183        // Extract JSX expression comments and annotation tags (TODO/FIXME/etc.)
184        let mut comment_tags = Vec::new();
185        extract_jsx_comments(&mut comment_tags, root, source_bytes, false);
186
187        Ok(ParseResult {
188            nodes,
189            edges,
190            comment_tags,
191        })
192    }
193}
194
195fn file_node_id(rel_path: &str) -> String {
196    format!("file:{}", rel_path)
197}
198
199#[allow(clippy::too_many_arguments)]
200fn extract_nodes(
201    nodes: &mut Vec<NodeDef>,
202    edges: &mut Vec<EdgeDef>,
203    file: &SourceFile,
204    query: &Query,
205    root: tree_sitter::Node,
206    source_bytes: &[u8],
207    kind: NodeKind,
208    prefix: &str,
209    file_id: &str,
210) {
211    let mut cursor = QueryCursor::new();
212    for m in cursor.matches(query, root, source_bytes) {
213        let Some(name_capture) = m
214            .captures
215            .iter()
216            .find(|c| query.capture_names()[c.index as usize] == "name")
217        else {
218            continue;
219        };
220
221        let name = unquote_str(&source_bytes[name_capture.node.byte_range()]);
222        let node_start = name_capture.node.start_position();
223
224        // Use the body node's end position so the snippet covers the full function/class body
225        let body_end = m
226            .captures
227            .iter()
228            .find(|c| {
229                let cap_name = &query.capture_names()[c.index as usize];
230                *cap_name == "fn" || *cap_name == "cls" || *cap_name == "m"
231            })
232            .map(|c| c.node.end_position())
233            .unwrap_or_else(|| name_capture.node.end_position());
234
235        let Some(_fn_capture) = m.captures.iter().find(|c| {
236            let cap_name = &query.capture_names()[c.index as usize];
237            *cap_name == "fn" || *cap_name == "cls" || *cap_name == "m"
238        }) else {
239            continue;
240        };
241
242        let id = format!("{}:{}:{}", prefix, file.relative_path, name);
243
244        nodes.push(NodeDef {
245            id,
246            kind: kind.clone(),
247            name,
248            path: file.relative_path.clone(),
249            line_start: node_start.row as u32 + 1,
250            line_end: body_end.row as u32 + 1,
251            ..Default::default()
252        });
253
254        edges.push(EdgeDef {
255            src: file_id.to_string(),
256            dst: format!(
257                "{}:{}:{}",
258                prefix,
259                file.relative_path,
260                unquote_str(&source_bytes[name_capture.node.byte_range()])
261            ),
262            kind: EdgeKind::Exports,
263            ..Default::default()
264        });
265    }
266}
267
268#[allow(clippy::too_many_arguments)]
269fn process_exports(
270    _nodes: &mut Vec<NodeDef>,
271    edges: &mut Vec<EdgeDef>,
272    file: &SourceFile,
273    query: &Query,
274    root: tree_sitter::Node,
275    source_bytes: &[u8],
276    file_id: &str,
277    prefix: &str,
278) {
279    let mut cursor = QueryCursor::new();
280    for m in cursor.matches(query, root, source_bytes) {
281        let Some(name_capture) = m
282            .captures
283            .iter()
284            .find(|c| query.capture_names()[c.index as usize] == "name")
285        else {
286            continue;
287        };
288
289        let name = node_text(name_capture.node, source_bytes);
290
291        edges.push(EdgeDef {
292            src: file_id.to_string(),
293            dst: format!("{}:{}:{}", prefix, file.relative_path, name),
294            kind: EdgeKind::Exports,
295            ..Default::default()
296        });
297    }
298}
299
300fn node_text(node: tree_sitter::Node, source: &[u8]) -> String {
301    node.utf8_text(source).unwrap_or("").to_string()
302}
303
304fn extract_imports(
305    edges: &mut Vec<EdgeDef>,
306    root: tree_sitter::Node,
307    source_bytes: &[u8],
308    file_id: &str,
309    file: &SourceFile,
310) {
311    // Walk the entire tree (not just root children) to find imports and requires
312    let mut cursor = root.walk();
313    traverse_imports(edges, root, source_bytes, file_id, file, &mut cursor);
314}
315
316fn traverse_imports(
317    edges: &mut Vec<EdgeDef>,
318    node: tree_sitter::Node,
319    source_bytes: &[u8],
320    file_id: &str,
321    file: &SourceFile,
322    cursor: &mut tree_sitter::TreeCursor,
323) {
324    if node.kind() == "import_statement" {
325        for j in 0..node.child_count() {
326            let Some(import_child) = node.child(j) else {
327                continue;
328            };
329            if import_child.kind() == "string" {
330                let import_path = unquote_str(&source_bytes[import_child.byte_range()]);
331                if import_path.starts_with('.') {
332                    let resolved = resolve_import_path(&file.relative_path, &import_path);
333                    if !resolved.is_empty() {
334                        edges.push(EdgeDef {
335                            src: file_id.to_string(),
336                            dst: file_node_id(&resolved),
337                            kind: EdgeKind::Imports,
338                            ..Default::default()
339                        });
340                    }
341                }
342                break;
343            }
344        }
345    } else if node.kind() == "call_expression" {
346        // Check for require('...')
347        if let Some(func) = node.child_by_field_name("function") {
348            if func.kind() == "identifier" && node_text(func, source_bytes) == "require" {
349                if let Some(args) = node.child_by_field_name("arguments") {
350                    for k in 0..args.child_count() {
351                        let Some(arg) = args.child(k) else { continue };
352                        if arg.kind() == "string" {
353                            let import_path = unquote_str(&source_bytes[arg.byte_range()]);
354                            if import_path.starts_with('.') {
355                                let resolved =
356                                    resolve_import_path(&file.relative_path, &import_path);
357                                if !resolved.is_empty() {
358                                    edges.push(EdgeDef {
359                                        src: file_id.to_string(),
360                                        dst: file_node_id(&resolved),
361                                        kind: EdgeKind::Imports,
362                                        ..Default::default()
363                                    });
364                                }
365                            }
366                            break;
367                        }
368                    }
369                }
370            }
371        }
372    }
373
374    if cursor.goto_first_child() {
375        loop {
376            let child = cursor.node();
377            traverse_imports(edges, child, source_bytes, file_id, file, cursor);
378            if !cursor.goto_next_sibling() {
379                break;
380            }
381        }
382        cursor.goto_parent();
383    }
384}
385
386fn unquote_str(s: &[u8]) -> String {
387    let s = std::str::from_utf8(s).unwrap_or("");
388    s.trim().trim_matches('\'').trim_matches('"').to_string()
389}
390
391fn resolve_import_path(current: &str, import: &str) -> String {
392    let mut parts: Vec<&str> = current.split('/').collect();
393    parts.pop(); // remove filename
394
395    for segment in import.split('/') {
396        match segment {
397            "." => {}
398            ".." => {
399                parts.pop();
400            }
401            _ => parts.push(segment),
402        }
403    }
404
405    parts.join("/")
406}
407
408/// Walk the AST tracking function context, emitting CALLS edges for each call_expression.
409fn extract_calls(edges: &mut Vec<EdgeDef>, root: Node, source: &[u8], file: &SourceFile) {
410    let mut fn_stack: Vec<String> = Vec::new();
411    walk_for_calls(edges, root, source, file, &mut fn_stack);
412}
413
414fn is_fn_node(kind: &str) -> bool {
415    matches!(
416        kind,
417        "function_declaration"
418            | "function"
419            | "arrow_function"
420            | "method_definition"
421            | "generator_function_declaration"
422            | "generator_function"
423    )
424}
425
426fn fn_name_from_node<'a>(node: Node<'a>, source: &[u8], file: &SourceFile) -> Option<String> {
427    // function_declaration / generator_function_declaration: has `name` field
428    if let Some(name_node) = node.child_by_field_name("name") {
429        let name = name_node.utf8_text(source).unwrap_or("").to_string();
430        if !name.is_empty() {
431            return Some(format!("fn:{}:{}", file.relative_path, name));
432        }
433    }
434    // Arrow/anonymous assigned to variable: look at parent variable_declarator
435    let parent = node.parent()?;
436    if parent.kind() == "variable_declarator" {
437        if let Some(name_node) = parent.child_by_field_name("name") {
438            let name = name_node.utf8_text(source).unwrap_or("").to_string();
439            if !name.is_empty() {
440                return Some(format!("fn:{}:{}", file.relative_path, name));
441            }
442        }
443    }
444    None
445}
446
447fn walk_for_calls(
448    edges: &mut Vec<EdgeDef>,
449    node: Node,
450    source: &[u8],
451    file: &SourceFile,
452    fn_stack: &mut Vec<String>,
453) {
454    let kind = node.kind();
455    let pushed = is_fn_node(kind);
456
457    if pushed {
458        if let Some(id) = fn_name_from_node(node, source, file) {
459            fn_stack.push(id);
460        } else {
461            // anonymous — push a sentinel so pop is balanced
462            fn_stack.push(String::new());
463        }
464    }
465
466    if kind == "call_expression" {
467        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
468            let callee_name = node
469                .child_by_field_name("function")
470                .and_then(|func| match func.kind() {
471                    "identifier" => Some(func.utf8_text(source).unwrap_or("").to_string()),
472                    "member_expression" => func
473                        .child_by_field_name("property")
474                        .map(|p| p.utf8_text(source).unwrap_or("").to_string()),
475                    _ => None,
476                })
477                .unwrap_or_default();
478
479            if !callee_name.is_empty() && callee_name != "require" {
480                edges.push(EdgeDef {
481                    src: caller_id.clone(),
482                    dst: callee_name,
483                    kind: EdgeKind::Calls,
484                    confidence: 0.7,
485                    ..Default::default()
486                });
487            }
488        }
489    }
490
491    // JSX component usage: <ComponentName ... /> and <ComponentName ...>
492    // Treat JSX elements as calls from the enclosing function to the component.
493    if kind == "jsx_opening_element" || kind == "jsx_self_closing_element" {
494        if let Some(caller_id) = fn_stack.last().filter(|s| !s.is_empty()) {
495            let tag_name = node
496                .child_by_field_name("name")
497                .map(|n| n.utf8_text(source).unwrap_or("").to_string())
498                .unwrap_or_default();
499
500            // Only emit edges for PascalCase or camelCase user-defined components.
501            // Lowercase tags like <div>, <span> are HTML intrinsics — skip them.
502            let is_component = tag_name
503                .chars()
504                .next()
505                .map(|c| {
506                    c.is_uppercase()
507                        || (c.is_lowercase() && tag_name.len() > 3 && tag_name.contains('.'))
508                })
509                .unwrap_or(false);
510
511            if is_component {
512                // Strip member access for <Namespace.Component /> — use only the last segment
513                let callee = tag_name
514                    .split('.')
515                    .next_back()
516                    .unwrap_or(&tag_name)
517                    .to_string();
518                edges.push(EdgeDef {
519                    src: caller_id.clone(),
520                    dst: callee,
521                    kind: EdgeKind::Calls,
522                    confidence: 0.6,
523                    ..Default::default()
524                });
525            }
526        }
527    }
528
529    let mut cursor = node.walk();
530    if cursor.goto_first_child() {
531        loop {
532            walk_for_calls(edges, cursor.node(), source, file, fn_stack);
533            if !cursor.goto_next_sibling() {
534                break;
535            }
536        }
537    }
538
539    if pushed {
540        fn_stack.pop();
541    }
542}
543
544const ANNOTATION_TAGS: &[&str] = &[
545    "TODO", "FIXME", "HACK", "NOTE", "BUG", "OPTIMIZE", "WARN", "XXX",
546];
547
548/// Recursively walk the AST extracting annotation comments (TODO/FIXME/etc.).
549/// `in_jsx_expression` tracks whether we are inside a `jsx_expression` node,
550/// which is how `{/* ... */}` comments appear in the TSX grammar.
551fn extract_jsx_comments(
552    tags: &mut Vec<CommentTag>,
553    node: Node,
554    source: &[u8],
555    in_jsx_expression: bool,
556) {
557    let kind = node.kind();
558
559    // Track whether we're entering a jsx_expression wrapper
560    let now_in_jsx = in_jsx_expression || kind == "jsx_expression";
561
562    if kind == "comment" {
563        let raw = node.utf8_text(source).unwrap_or("").trim();
564
565        let comment_kind = if in_jsx_expression {
566            // Strip `/*` / `*/` delimiters and check for commented-out JSX code
567            let inner = raw.trim_start_matches("/*").trim_end_matches("*/").trim();
568            if inner.starts_with('<') || inner.contains("</") || inner.contains("/>") {
569                CommentKind::JsxCommentedCode
570            } else {
571                CommentKind::JsxExpression
572            }
573        } else {
574            CommentKind::Standard
575        };
576
577        let upper = raw.to_uppercase();
578        for &tag in ANNOTATION_TAGS {
579            if upper.contains(tag) {
580                tags.push(CommentTag {
581                    tag_type: tag.to_string(),
582                    text: raw.to_string(),
583                    line: node.start_position().row as u32 + 1,
584                    comment_kind: comment_kind.clone(),
585                });
586                break;
587            }
588        }
589    }
590
591    let mut cursor = node.walk();
592    if cursor.goto_first_child() {
593        loop {
594            extract_jsx_comments(tags, cursor.node(), source, now_in_jsx);
595            if !cursor.goto_next_sibling() {
596                break;
597            }
598        }
599    }
600}