Skip to main content

dk_engine/parser/
typescript_parser.rs

1use super::LanguageParser;
2use dk_core::{CallKind, Import, RawCallEdge, Result, Span, Symbol, SymbolKind, TypeInfo, Visibility};
3use std::path::Path;
4use tree_sitter::{Node, Parser, TreeCursor};
5use uuid::Uuid;
6
7/// TypeScript/JavaScript parser backed by tree-sitter.
8///
9/// Extracts symbols, call edges, imports, and (stub) type information from
10/// TypeScript, TSX, JavaScript, and JSX source files.
11///
12/// Uses the TSX grammar for all files since TSX is a superset of TypeScript.
13pub struct TypeScriptParser;
14
15impl TypeScriptParser {
16    pub fn new() -> Self {
17        Self
18    }
19
20    /// Create a configured tree-sitter parser for TypeScript (TSX superset).
21    fn create_parser() -> Result<Parser> {
22        let mut parser = Parser::new();
23        parser
24            .set_language(&tree_sitter_typescript::LANGUAGE_TSX.into())
25            .map_err(|e| {
26                dk_core::Error::ParseError(format!("Failed to load TypeScript grammar: {e}"))
27            })?;
28        Ok(parser)
29    }
30
31    /// Parse source bytes into a tree-sitter tree.
32    fn parse_tree(source: &[u8]) -> Result<tree_sitter::Tree> {
33        let mut parser = Self::create_parser()?;
34        parser
35            .parse(source, None)
36            .ok_or_else(|| dk_core::Error::ParseError("tree-sitter parse returned None".into()))
37    }
38
39    /// Get the text of a node as a UTF-8 string.
40    fn node_text<'a>(node: &Node, source: &'a [u8]) -> &'a str {
41        let text = &source[node.start_byte()..node.end_byte()];
42        std::str::from_utf8(text).unwrap_or("")
43    }
44
45    /// Extract the name from a node by looking for the `name` field.
46    fn node_name(node: &Node, source: &[u8]) -> Option<String> {
47        node.child_by_field_name("name")
48            .map(|n| Self::node_text(&n, source).to_string())
49    }
50
51    /// Extract the first line of the node's source text as the signature.
52    fn node_signature(node: &Node, source: &[u8]) -> Option<String> {
53        let text_str = Self::node_text(node, source);
54        let first_line = text_str.lines().next()?;
55        Some(first_line.trim().to_string())
56    }
57
58    /// Collect preceding `//` or `/** */` doc comments for a node.
59    fn doc_comments(node: &Node, source: &[u8]) -> Option<String> {
60        let mut comments = Vec::new();
61        let mut sibling = node.prev_sibling();
62
63        while let Some(prev) = sibling {
64            if prev.kind() == "comment" {
65                let text = Self::node_text(&prev, source).trim().to_string();
66                comments.push(text);
67                sibling = prev.prev_sibling();
68                continue;
69            }
70            break;
71        }
72
73        if comments.is_empty() {
74            None
75        } else {
76            comments.reverse();
77            Some(comments.join("\n"))
78        }
79    }
80
81    /// Map a tree-sitter node kind to our SymbolKind, if applicable.
82    fn map_symbol_kind(kind: &str) -> Option<SymbolKind> {
83        match kind {
84            "function_declaration" => Some(SymbolKind::Function),
85            "class_declaration" => Some(SymbolKind::Class),
86            "interface_declaration" => Some(SymbolKind::Interface),
87            "type_alias_declaration" => Some(SymbolKind::TypeAlias),
88            "enum_declaration" => Some(SymbolKind::Enum),
89            "lexical_declaration" => Some(SymbolKind::Const),
90            "expression_statement" => Some(SymbolKind::Const),
91            _ => None,
92        }
93    }
94
95    /// Derive a symbol name from a top-level expression_statement.
96    ///
97    /// Handles common patterns like:
98    /// - `router.get("/path", ...)` → "router.get:/path"
99    /// - `app.use(middleware)` → "app.use"
100    /// - `module.exports = ...` → "module.exports"
101    fn expression_statement_name(node: &Node, source: &[u8]) -> Option<String> {
102        let child = node.child(0)?;
103        match child.kind() {
104            "call_expression" => {
105                let func = child.child_by_field_name("function")?;
106                let func_text = Self::node_text(&func, source).to_string();
107                // For router.get("/path", ...), extract the route path from first arg
108                let args = child.child_by_field_name("arguments")?;
109                let mut cursor = args.walk();
110                for arg_child in args.children(&mut cursor) {
111                    if arg_child.kind() == "string" || arg_child.kind() == "template_string" {
112                        let path = Self::node_text(&arg_child, source)
113                            .trim_matches(|c| c == '"' || c == '\'' || c == '`')
114                            .to_string();
115                        return Some(format!("{func_text}:{path}"));
116                    }
117                }
118                Some(func_text)
119            }
120            "assignment_expression" => {
121                let left = child.child_by_field_name("left")?;
122                Some(Self::node_text(&left, source).to_string())
123            }
124            _ => {
125                // Fallback: use first line trimmed
126                let text = Self::node_text(&child, source);
127                let first_line = text.lines().next()?;
128                let name = first_line.trim();
129                if name.chars().count() > 60 {
130                    let truncated: String = name.chars().take(57).collect();
131                    Some(format!("{truncated}..."))
132                } else {
133                    Some(name.to_string())
134                }
135            }
136        }
137    }
138
139    /// Extract variable names from a lexical_declaration.
140    /// e.g. `const MAX_RETRIES = 3;` yields "MAX_RETRIES".
141    fn extract_variable_names(node: &Node, source: &[u8]) -> Vec<String> {
142        let mut names = Vec::new();
143        let mut cursor = node.walk();
144        for child in node.children(&mut cursor) {
145            if child.kind() == "variable_declarator" {
146                if let Some(name_node) = child.child_by_field_name("name") {
147                    let name = Self::node_text(&name_node, source).to_string();
148                    if !name.is_empty() {
149                        names.push(name);
150                    }
151                }
152            }
153        }
154        names
155    }
156
157    /// Extract a symbol from a declaration node.
158    fn extract_symbol(
159        node: &Node,
160        source: &[u8],
161        file_path: &Path,
162        visibility: Visibility,
163    ) -> Vec<Symbol> {
164        let kind = match Self::map_symbol_kind(node.kind()) {
165            Some(k) => k,
166            None => return vec![],
167        };
168
169        // For expression_statement (e.g. router.get(...)), derive name from the expression
170        if node.kind() == "expression_statement" {
171            let name = match Self::expression_statement_name(node, source) {
172                Some(n) if !n.is_empty() => n,
173                _ => return vec![],
174            };
175            return vec![Symbol {
176                id: Uuid::new_v4(),
177                name: name.clone(),
178                qualified_name: name,
179                kind: SymbolKind::Const,
180                visibility,
181                file_path: file_path.to_path_buf(),
182                span: Span {
183                    start_byte: node.start_byte() as u32,
184                    end_byte: node.end_byte() as u32,
185                },
186                signature: Self::node_signature(node, source),
187                doc_comment: Self::doc_comments(node, source),
188                parent: None,
189                last_modified_by: None,
190                last_modified_intent: None,
191            }];
192        }
193
194        // For lexical_declaration (const/let/var), extract variable names
195        if node.kind() == "lexical_declaration" {
196            let names = Self::extract_variable_names(node, source);
197            return names
198                .into_iter()
199                .map(|name| Symbol {
200                    id: Uuid::new_v4(),
201                    name: name.clone(),
202                    qualified_name: name,
203                    kind: SymbolKind::Const,
204                    visibility: visibility.clone(),
205                    file_path: file_path.to_path_buf(),
206                    span: Span {
207                        start_byte: node.start_byte() as u32,
208                        end_byte: node.end_byte() as u32,
209                    },
210                    signature: Self::node_signature(node, source),
211                    doc_comment: Self::doc_comments(node, source),
212                    parent: None,
213                    last_modified_by: None,
214                    last_modified_intent: None,
215                })
216                .collect();
217        }
218
219        let name = match Self::node_name(node, source) {
220            Some(n) if !n.is_empty() => n,
221            _ => return vec![],
222        };
223
224        vec![Symbol {
225            id: Uuid::new_v4(),
226            name: name.clone(),
227            qualified_name: name,
228            kind,
229            visibility,
230            file_path: file_path.to_path_buf(),
231            span: Span {
232                start_byte: node.start_byte() as u32,
233                end_byte: node.end_byte() as u32,
234            },
235            signature: Self::node_signature(node, source),
236            doc_comment: Self::doc_comments(node, source),
237            parent: None,
238            last_modified_by: None,
239            last_modified_intent: None,
240        }]
241    }
242
243    /// Find the name of the enclosing function for a given node, if any.
244    fn enclosing_function_name(node: &Node, source: &[u8]) -> String {
245        let mut current = node.parent();
246        while let Some(parent) = current {
247            match parent.kind() {
248                "function_declaration" | "method_definition" => {
249                    if let Some(name_node) = parent.child_by_field_name("name") {
250                        let name = Self::node_text(&name_node, source);
251                        if !name.is_empty() {
252                            return name.to_string();
253                        }
254                    }
255                }
256                "arrow_function" | "function_expression" | "function" => {
257                    // Anonymous function — check if it's assigned to a variable
258                    if let Some(gp) = parent.parent() {
259                        if gp.kind() == "variable_declarator" {
260                            if let Some(name_node) = gp.child_by_field_name("name") {
261                                let name = Self::node_text(&name_node, source);
262                                if !name.is_empty() {
263                                    return name.to_string();
264                                }
265                            }
266                        }
267                    }
268                }
269                _ => {}
270            }
271            current = parent.parent();
272        }
273        "<module>".to_string()
274    }
275
276    /// Extract the callee name from a call_expression or new_expression's function node.
277    fn extract_callee_name(node: &Node, source: &[u8]) -> (String, CallKind) {
278        match node.kind() {
279            "member_expression" => {
280                // e.g. console.log, user.save()
281                if let Some(prop) = node.child_by_field_name("property") {
282                    let name = Self::node_text(&prop, source).to_string();
283                    return (name, CallKind::MethodCall);
284                }
285                let text = Self::node_text(node, source).to_string();
286                (text, CallKind::MethodCall)
287            }
288            "identifier" => {
289                let name = Self::node_text(node, source).to_string();
290                (name, CallKind::DirectCall)
291            }
292            _ => {
293                let text = Self::node_text(node, source).to_string();
294                (text, CallKind::DirectCall)
295            }
296        }
297    }
298
299    /// Recursively walk the tree to extract call edges.
300    fn walk_calls(cursor: &mut TreeCursor, source: &[u8], calls: &mut Vec<RawCallEdge>) {
301        let node = cursor.node();
302
303        match node.kind() {
304            "call_expression" => {
305                // Direct or method call: get the function part
306                if let Some(func_node) = node.child_by_field_name("function") {
307                    let (callee, kind) = Self::extract_callee_name(&func_node, source);
308                    if !callee.is_empty() {
309                        let caller = Self::enclosing_function_name(&node, source);
310                        calls.push(RawCallEdge {
311                            caller_name: caller,
312                            callee_name: callee,
313                            call_site: Span {
314                                start_byte: node.start_byte() as u32,
315                                end_byte: node.end_byte() as u32,
316                            },
317                            kind,
318                        });
319                    }
320                }
321            }
322            "new_expression" => {
323                // Constructor call: new ClassName(...)
324                if let Some(constructor_node) = node.child_by_field_name("constructor") {
325                    let name = Self::node_text(&constructor_node, source).to_string();
326                    if !name.is_empty() {
327                        let caller = Self::enclosing_function_name(&node, source);
328                        calls.push(RawCallEdge {
329                            caller_name: caller,
330                            callee_name: name,
331                            call_site: Span {
332                                start_byte: node.start_byte() as u32,
333                                end_byte: node.end_byte() as u32,
334                            },
335                            kind: CallKind::DirectCall,
336                        });
337                    }
338                }
339            }
340            _ => {}
341        }
342
343        // Recurse into children
344        if cursor.goto_first_child() {
345            loop {
346                Self::walk_calls(cursor, source, calls);
347                if !cursor.goto_next_sibling() {
348                    break;
349                }
350            }
351            cursor.goto_parent();
352        }
353    }
354
355    /// Extract the alias name from a namespace_import node (e.g. `* as utils`).
356    fn namespace_import_alias(node: &Node, source: &[u8]) -> Option<String> {
357        if let Some(name_node) = node.child_by_field_name("name") {
358            return Some(Self::node_text(&name_node, source).to_string());
359        }
360        // Fallback: look for identifier child
361        let mut cursor = node.walk();
362        for child in node.children(&mut cursor) {
363            if child.kind() == "identifier" {
364                return Some(Self::node_text(&child, source).to_string());
365            }
366        }
367        None
368    }
369
370    /// Extract imports from an import_statement node.
371    ///
372    /// Handles:
373    /// - `import { A, B } from 'module'`
374    /// - `import * as ns from 'module'`
375    /// - `import Default from 'module'`
376    /// - `import 'module'` (side-effect import)
377    fn extract_import(node: &Node, source: &[u8]) -> Vec<Import> {
378        let mut imports = Vec::new();
379
380        // Get the module path (source field of import_statement)
381        let module_path = match node.child_by_field_name("source") {
382            Some(src_node) => {
383                let raw = Self::node_text(&src_node, source);
384                // Strip quotes from string literal
385                raw.trim_matches(|c| c == '\'' || c == '"').to_string()
386            }
387            None => return imports,
388        };
389
390        let is_external = !module_path.starts_with('.') && !module_path.starts_with('/');
391
392        // Walk children to find imported names
393        let mut cursor = node.walk();
394        let mut found_names = false;
395
396        for child in node.children(&mut cursor) {
397            match child.kind() {
398                "import_clause" => {
399                    Self::extract_import_clause(&child, source, &module_path, is_external, &mut imports);
400                    found_names = true;
401                }
402                "named_imports" => {
403                    Self::extract_named_imports(&child, source, &module_path, is_external, &mut imports);
404                    found_names = true;
405                }
406                "namespace_import" => {
407                    // import * as ns from 'module'
408                    let alias = Self::namespace_import_alias(&child, source);
409                    imports.push(Import {
410                        module_path: module_path.clone(),
411                        imported_name: "*".to_string(),
412                        alias,
413                        is_external,
414                    });
415                    found_names = true;
416                }
417                "identifier" => {
418                    // Default import: import Foo from 'module'
419                    let name = Self::node_text(&child, source).to_string();
420                    if name != "import" && name != "from" && name != "type" {
421                        imports.push(Import {
422                            module_path: module_path.clone(),
423                            imported_name: name,
424                            alias: None,
425                            is_external,
426                        });
427                        found_names = true;
428                    }
429                }
430                _ => {}
431            }
432        }
433
434        // Side-effect import: import 'module'
435        if !found_names {
436            imports.push(Import {
437                module_path,
438                imported_name: "*".to_string(),
439                alias: None,
440                is_external,
441            });
442        }
443
444        imports
445    }
446
447    /// Extract names from an import_clause node.
448    fn extract_import_clause(
449        node: &Node,
450        source: &[u8],
451        module_path: &str,
452        is_external: bool,
453        imports: &mut Vec<Import>,
454    ) {
455        let mut cursor = node.walk();
456        for child in node.children(&mut cursor) {
457            match child.kind() {
458                "identifier" => {
459                    // Default import
460                    let name = Self::node_text(&child, source).to_string();
461                    imports.push(Import {
462                        module_path: module_path.to_string(),
463                        imported_name: name,
464                        alias: None,
465                        is_external,
466                    });
467                }
468                "named_imports" => {
469                    Self::extract_named_imports(&child, source, module_path, is_external, imports);
470                }
471                "namespace_import" => {
472                    let alias = Self::namespace_import_alias(&child, source);
473                    imports.push(Import {
474                        module_path: module_path.to_string(),
475                        imported_name: "*".to_string(),
476                        alias,
477                        is_external,
478                    });
479                }
480                _ => {}
481            }
482        }
483    }
484
485    /// Extract individual names from a named_imports node (`{ A, B as C }`).
486    fn extract_named_imports(
487        node: &Node,
488        source: &[u8],
489        module_path: &str,
490        is_external: bool,
491        imports: &mut Vec<Import>,
492    ) {
493        let mut cursor = node.walk();
494        for child in node.children(&mut cursor) {
495            if child.kind() == "import_specifier" {
496                let name_node = child.child_by_field_name("name");
497                let alias_node = child.child_by_field_name("alias");
498
499                let imported_name = name_node
500                    .map(|n| Self::node_text(&n, source).to_string())
501                    .unwrap_or_default();
502
503                let alias = alias_node.map(|n| Self::node_text(&n, source).to_string());
504
505                if !imported_name.is_empty() {
506                    imports.push(Import {
507                        module_path: module_path.to_string(),
508                        imported_name,
509                        alias,
510                        is_external,
511                    });
512                }
513            }
514        }
515    }
516}
517
518impl Default for TypeScriptParser {
519    fn default() -> Self {
520        Self::new()
521    }
522}
523
524impl LanguageParser for TypeScriptParser {
525    fn extensions(&self) -> &[&str] {
526        &["ts", "tsx", "js", "jsx"]
527    }
528
529    fn extract_symbols(&self, source: &[u8], file_path: &Path) -> Result<Vec<Symbol>> {
530        if source.is_empty() {
531            return Ok(vec![]);
532        }
533
534        let tree = Self::parse_tree(source)?;
535        let root = tree.root_node();
536        let mut symbols = Vec::new();
537        let mut cursor = root.walk();
538
539        for node in root.children(&mut cursor) {
540            match node.kind() {
541                "export_statement" => {
542                    // Exported declaration: unwrap to find the inner declaration.
543                    // Also capture bare export statements (e.g. `export default router;`)
544                    // as symbols so they survive AST merge reconstruction.
545                    let mut inner_cursor = node.walk();
546                    let mut found_inner = false;
547                    for child in node.children(&mut inner_cursor) {
548                        if Self::map_symbol_kind(child.kind()).is_some() {
549                            symbols.extend(Self::extract_symbol(
550                                &child,
551                                source,
552                                file_path,
553                                Visibility::Public,
554                            ));
555                            found_inner = true;
556                        }
557                    }
558                    if !found_inner {
559                        // Bare export (e.g. `export default router;`) — treat the
560                        // entire export_statement as a Const symbol. Extract the
561                        // exported identifier from the tree for a stable name.
562                        let name = node
563                            .child_by_field_name("declaration")
564                            .or_else(|| node.child_by_field_name("value"))
565                            .map(|n| Self::node_text(&n, source).trim().to_string())
566                            .filter(|s| !s.is_empty())
567                            .unwrap_or_else(|| {
568                                let text = Self::node_text(&node, source);
569                                text.lines().next().unwrap_or("export").trim().to_string()
570                            });
571                        symbols.push(Symbol {
572                            id: Uuid::new_v4(),
573                            name: name.clone(),
574                            qualified_name: name,
575                            kind: SymbolKind::Const,
576                            visibility: Visibility::Public,
577                            file_path: file_path.to_path_buf(),
578                            span: Span {
579                                start_byte: node.start_byte() as u32,
580                                end_byte: node.end_byte() as u32,
581                            },
582                            signature: Self::node_signature(&node, source),
583                            doc_comment: Self::doc_comments(&node, source),
584                            parent: None,
585                            last_modified_by: None,
586                            last_modified_intent: None,
587                        });
588                    }
589                }
590                kind if Self::map_symbol_kind(kind).is_some() => {
591                    // Non-exported top-level declaration
592                    symbols.extend(Self::extract_symbol(
593                        &node,
594                        source,
595                        file_path,
596                        Visibility::Private,
597                    ));
598                }
599                _ => {}
600            }
601        }
602
603        // Deduplicate qualified_names to prevent BTreeMap key collisions in
604        // ast_merge (which silently drops earlier entries with the same key).
605        // Common case: multiple `app.use(...)` calls all resolve to "app.use".
606        let mut seen: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
607        for sym in &mut symbols {
608            let count = seen.entry(sym.qualified_name.clone()).or_insert(0);
609            *count += 1;
610            if *count > 1 {
611                sym.qualified_name = format!("{}#{}", sym.qualified_name, count);
612                sym.name = sym.qualified_name.clone();
613            }
614        }
615
616        Ok(symbols)
617    }
618
619    fn extract_calls(&self, source: &[u8], _file_path: &Path) -> Result<Vec<RawCallEdge>> {
620        if source.is_empty() {
621            return Ok(vec![]);
622        }
623
624        let tree = Self::parse_tree(source)?;
625        let root = tree.root_node();
626        let mut calls = Vec::new();
627        let mut cursor = root.walk();
628
629        Self::walk_calls(&mut cursor, source, &mut calls);
630
631        Ok(calls)
632    }
633
634    fn extract_types(&self, _source: &[u8], _file_path: &Path) -> Result<Vec<TypeInfo>> {
635        // Stub: will be enhanced later
636        Ok(vec![])
637    }
638
639    fn extract_imports(&self, source: &[u8], _file_path: &Path) -> Result<Vec<Import>> {
640        if source.is_empty() {
641            return Ok(vec![]);
642        }
643
644        let tree = Self::parse_tree(source)?;
645        let root = tree.root_node();
646        let mut imports = Vec::new();
647        let mut cursor = root.walk();
648
649        for node in root.children(&mut cursor) {
650            if node.kind() == "import_statement" {
651                imports.extend(Self::extract_import(&node, source));
652            }
653        }
654
655        Ok(imports)
656    }
657}