Skip to main content

dk_engine/parser/
typescript_parser.rs

1use super::LanguageParser;
2use dk_core::{CallKind, Import, RawCallEdge, Result, Span, Symbol, SymbolKind, TypeInfo, Visibility};
3use std::path::Path;
4use tree_sitter::{Node, Parser, TreeCursor};
5use uuid::Uuid;
6
7/// TypeScript/JavaScript parser backed by tree-sitter.
8///
9/// Extracts symbols, call edges, imports, and (stub) type information from
10/// TypeScript, TSX, JavaScript, and JSX source files.
11///
12/// Uses the TSX grammar for all files since TSX is a superset of TypeScript.
13pub struct TypeScriptParser;
14
15impl TypeScriptParser {
16    pub fn new() -> Self {
17        Self
18    }
19
20    /// Create a configured tree-sitter parser for TypeScript (TSX superset).
21    fn create_parser() -> Result<Parser> {
22        let mut parser = Parser::new();
23        parser
24            .set_language(&tree_sitter_typescript::LANGUAGE_TSX.into())
25            .map_err(|e| {
26                dk_core::Error::ParseError(format!("Failed to load TypeScript grammar: {e}"))
27            })?;
28        Ok(parser)
29    }
30
31    /// Parse source bytes into a tree-sitter tree.
32    fn parse_tree(source: &[u8]) -> Result<tree_sitter::Tree> {
33        let mut parser = Self::create_parser()?;
34        parser
35            .parse(source, None)
36            .ok_or_else(|| dk_core::Error::ParseError("tree-sitter parse returned None".into()))
37    }
38
39    /// Get the text of a node as a UTF-8 string.
40    fn node_text<'a>(node: &Node, source: &'a [u8]) -> &'a str {
41        let text = &source[node.start_byte()..node.end_byte()];
42        std::str::from_utf8(text).unwrap_or("")
43    }
44
45    /// Extract the name from a node by looking for the `name` field.
46    fn node_name(node: &Node, source: &[u8]) -> Option<String> {
47        node.child_by_field_name("name")
48            .map(|n| Self::node_text(&n, source).to_string())
49    }
50
51    /// Extract the first line of the node's source text as the signature.
52    fn node_signature(node: &Node, source: &[u8]) -> Option<String> {
53        let text_str = Self::node_text(node, source);
54        let first_line = text_str.lines().next()?;
55        Some(first_line.trim().to_string())
56    }
57
58    /// Collect preceding `//` or `/** */` doc comments for a node.
59    fn doc_comments(node: &Node, source: &[u8]) -> Option<String> {
60        let mut comments = Vec::new();
61        let mut sibling = node.prev_sibling();
62
63        while let Some(prev) = sibling {
64            if prev.kind() == "comment" {
65                let text = Self::node_text(&prev, source).trim().to_string();
66                comments.push(text);
67                sibling = prev.prev_sibling();
68                continue;
69            }
70            break;
71        }
72
73        if comments.is_empty() {
74            None
75        } else {
76            comments.reverse();
77            Some(comments.join("\n"))
78        }
79    }
80
81    /// Map a tree-sitter node kind to our SymbolKind, if applicable.
82    fn map_symbol_kind(kind: &str) -> Option<SymbolKind> {
83        match kind {
84            "function_declaration" => Some(SymbolKind::Function),
85            "class_declaration" => Some(SymbolKind::Class),
86            "interface_declaration" => Some(SymbolKind::Interface),
87            "type_alias_declaration" => Some(SymbolKind::TypeAlias),
88            "enum_declaration" => Some(SymbolKind::Enum),
89            "lexical_declaration" => Some(SymbolKind::Const),
90            _ => None,
91        }
92    }
93
94    /// Extract variable names from a lexical_declaration.
95    /// e.g. `const MAX_RETRIES = 3;` yields "MAX_RETRIES".
96    fn extract_variable_names(node: &Node, source: &[u8]) -> Vec<String> {
97        let mut names = Vec::new();
98        let mut cursor = node.walk();
99        for child in node.children(&mut cursor) {
100            if child.kind() == "variable_declarator" {
101                if let Some(name_node) = child.child_by_field_name("name") {
102                    let name = Self::node_text(&name_node, source).to_string();
103                    if !name.is_empty() {
104                        names.push(name);
105                    }
106                }
107            }
108        }
109        names
110    }
111
112    /// Extract a symbol from a declaration node.
113    fn extract_symbol(
114        node: &Node,
115        source: &[u8],
116        file_path: &Path,
117        visibility: Visibility,
118    ) -> Vec<Symbol> {
119        let kind = match Self::map_symbol_kind(node.kind()) {
120            Some(k) => k,
121            None => return vec![],
122        };
123
124        // For lexical_declaration (const/let/var), extract variable names
125        if node.kind() == "lexical_declaration" {
126            let names = Self::extract_variable_names(node, source);
127            return names
128                .into_iter()
129                .map(|name| Symbol {
130                    id: Uuid::new_v4(),
131                    name: name.clone(),
132                    qualified_name: name,
133                    kind: SymbolKind::Const,
134                    visibility: visibility.clone(),
135                    file_path: file_path.to_path_buf(),
136                    span: Span {
137                        start_byte: node.start_byte() as u32,
138                        end_byte: node.end_byte() as u32,
139                    },
140                    signature: Self::node_signature(node, source),
141                    doc_comment: Self::doc_comments(node, source),
142                    parent: None,
143                    last_modified_by: None,
144                    last_modified_intent: None,
145                })
146                .collect();
147        }
148
149        let name = match Self::node_name(node, source) {
150            Some(n) if !n.is_empty() => n,
151            _ => return vec![],
152        };
153
154        vec![Symbol {
155            id: Uuid::new_v4(),
156            name: name.clone(),
157            qualified_name: name,
158            kind,
159            visibility,
160            file_path: file_path.to_path_buf(),
161            span: Span {
162                start_byte: node.start_byte() as u32,
163                end_byte: node.end_byte() as u32,
164            },
165            signature: Self::node_signature(node, source),
166            doc_comment: Self::doc_comments(node, source),
167            parent: None,
168            last_modified_by: None,
169            last_modified_intent: None,
170        }]
171    }
172
173    /// Find the name of the enclosing function for a given node, if any.
174    fn enclosing_function_name(node: &Node, source: &[u8]) -> String {
175        let mut current = node.parent();
176        while let Some(parent) = current {
177            match parent.kind() {
178                "function_declaration" | "method_definition" => {
179                    if let Some(name_node) = parent.child_by_field_name("name") {
180                        let name = Self::node_text(&name_node, source);
181                        if !name.is_empty() {
182                            return name.to_string();
183                        }
184                    }
185                }
186                "arrow_function" | "function_expression" | "function" => {
187                    // Anonymous function — check if it's assigned to a variable
188                    if let Some(gp) = parent.parent() {
189                        if gp.kind() == "variable_declarator" {
190                            if let Some(name_node) = gp.child_by_field_name("name") {
191                                let name = Self::node_text(&name_node, source);
192                                if !name.is_empty() {
193                                    return name.to_string();
194                                }
195                            }
196                        }
197                    }
198                }
199                _ => {}
200            }
201            current = parent.parent();
202        }
203        "<module>".to_string()
204    }
205
206    /// Extract the callee name from a call_expression or new_expression's function node.
207    fn extract_callee_name(node: &Node, source: &[u8]) -> (String, CallKind) {
208        match node.kind() {
209            "member_expression" => {
210                // e.g. console.log, user.save()
211                if let Some(prop) = node.child_by_field_name("property") {
212                    let name = Self::node_text(&prop, source).to_string();
213                    return (name, CallKind::MethodCall);
214                }
215                let text = Self::node_text(node, source).to_string();
216                (text, CallKind::MethodCall)
217            }
218            "identifier" => {
219                let name = Self::node_text(node, source).to_string();
220                (name, CallKind::DirectCall)
221            }
222            _ => {
223                let text = Self::node_text(node, source).to_string();
224                (text, CallKind::DirectCall)
225            }
226        }
227    }
228
229    /// Recursively walk the tree to extract call edges.
230    fn walk_calls(cursor: &mut TreeCursor, source: &[u8], calls: &mut Vec<RawCallEdge>) {
231        let node = cursor.node();
232
233        match node.kind() {
234            "call_expression" => {
235                // Direct or method call: get the function part
236                if let Some(func_node) = node.child_by_field_name("function") {
237                    let (callee, kind) = Self::extract_callee_name(&func_node, source);
238                    if !callee.is_empty() {
239                        let caller = Self::enclosing_function_name(&node, source);
240                        calls.push(RawCallEdge {
241                            caller_name: caller,
242                            callee_name: callee,
243                            call_site: Span {
244                                start_byte: node.start_byte() as u32,
245                                end_byte: node.end_byte() as u32,
246                            },
247                            kind,
248                        });
249                    }
250                }
251            }
252            "new_expression" => {
253                // Constructor call: new ClassName(...)
254                if let Some(constructor_node) = node.child_by_field_name("constructor") {
255                    let name = Self::node_text(&constructor_node, source).to_string();
256                    if !name.is_empty() {
257                        let caller = Self::enclosing_function_name(&node, source);
258                        calls.push(RawCallEdge {
259                            caller_name: caller,
260                            callee_name: name,
261                            call_site: Span {
262                                start_byte: node.start_byte() as u32,
263                                end_byte: node.end_byte() as u32,
264                            },
265                            kind: CallKind::DirectCall,
266                        });
267                    }
268                }
269            }
270            _ => {}
271        }
272
273        // Recurse into children
274        if cursor.goto_first_child() {
275            loop {
276                Self::walk_calls(cursor, source, calls);
277                if !cursor.goto_next_sibling() {
278                    break;
279                }
280            }
281            cursor.goto_parent();
282        }
283    }
284
285    /// Extract the alias name from a namespace_import node (e.g. `* as utils`).
286    fn namespace_import_alias(node: &Node, source: &[u8]) -> Option<String> {
287        if let Some(name_node) = node.child_by_field_name("name") {
288            return Some(Self::node_text(&name_node, source).to_string());
289        }
290        // Fallback: look for identifier child
291        let mut cursor = node.walk();
292        for child in node.children(&mut cursor) {
293            if child.kind() == "identifier" {
294                return Some(Self::node_text(&child, source).to_string());
295            }
296        }
297        None
298    }
299
300    /// Extract imports from an import_statement node.
301    ///
302    /// Handles:
303    /// - `import { A, B } from 'module'`
304    /// - `import * as ns from 'module'`
305    /// - `import Default from 'module'`
306    /// - `import 'module'` (side-effect import)
307    fn extract_import(node: &Node, source: &[u8]) -> Vec<Import> {
308        let mut imports = Vec::new();
309
310        // Get the module path (source field of import_statement)
311        let module_path = match node.child_by_field_name("source") {
312            Some(src_node) => {
313                let raw = Self::node_text(&src_node, source);
314                // Strip quotes from string literal
315                raw.trim_matches(|c| c == '\'' || c == '"').to_string()
316            }
317            None => return imports,
318        };
319
320        let is_external = !module_path.starts_with('.') && !module_path.starts_with('/');
321
322        // Walk children to find imported names
323        let mut cursor = node.walk();
324        let mut found_names = false;
325
326        for child in node.children(&mut cursor) {
327            match child.kind() {
328                "import_clause" => {
329                    Self::extract_import_clause(&child, source, &module_path, is_external, &mut imports);
330                    found_names = true;
331                }
332                "named_imports" => {
333                    Self::extract_named_imports(&child, source, &module_path, is_external, &mut imports);
334                    found_names = true;
335                }
336                "namespace_import" => {
337                    // import * as ns from 'module'
338                    let alias = Self::namespace_import_alias(&child, source);
339                    imports.push(Import {
340                        module_path: module_path.clone(),
341                        imported_name: "*".to_string(),
342                        alias,
343                        is_external,
344                    });
345                    found_names = true;
346                }
347                "identifier" => {
348                    // Default import: import Foo from 'module'
349                    let name = Self::node_text(&child, source).to_string();
350                    if name != "import" && name != "from" && name != "type" {
351                        imports.push(Import {
352                            module_path: module_path.clone(),
353                            imported_name: name,
354                            alias: None,
355                            is_external,
356                        });
357                        found_names = true;
358                    }
359                }
360                _ => {}
361            }
362        }
363
364        // Side-effect import: import 'module'
365        if !found_names {
366            imports.push(Import {
367                module_path,
368                imported_name: "*".to_string(),
369                alias: None,
370                is_external,
371            });
372        }
373
374        imports
375    }
376
377    /// Extract names from an import_clause node.
378    fn extract_import_clause(
379        node: &Node,
380        source: &[u8],
381        module_path: &str,
382        is_external: bool,
383        imports: &mut Vec<Import>,
384    ) {
385        let mut cursor = node.walk();
386        for child in node.children(&mut cursor) {
387            match child.kind() {
388                "identifier" => {
389                    // Default import
390                    let name = Self::node_text(&child, source).to_string();
391                    imports.push(Import {
392                        module_path: module_path.to_string(),
393                        imported_name: name,
394                        alias: None,
395                        is_external,
396                    });
397                }
398                "named_imports" => {
399                    Self::extract_named_imports(&child, source, module_path, is_external, imports);
400                }
401                "namespace_import" => {
402                    let alias = Self::namespace_import_alias(&child, source);
403                    imports.push(Import {
404                        module_path: module_path.to_string(),
405                        imported_name: "*".to_string(),
406                        alias,
407                        is_external,
408                    });
409                }
410                _ => {}
411            }
412        }
413    }
414
415    /// Extract individual names from a named_imports node (`{ A, B as C }`).
416    fn extract_named_imports(
417        node: &Node,
418        source: &[u8],
419        module_path: &str,
420        is_external: bool,
421        imports: &mut Vec<Import>,
422    ) {
423        let mut cursor = node.walk();
424        for child in node.children(&mut cursor) {
425            if child.kind() == "import_specifier" {
426                let name_node = child.child_by_field_name("name");
427                let alias_node = child.child_by_field_name("alias");
428
429                let imported_name = name_node
430                    .map(|n| Self::node_text(&n, source).to_string())
431                    .unwrap_or_default();
432
433                let alias = alias_node.map(|n| Self::node_text(&n, source).to_string());
434
435                if !imported_name.is_empty() {
436                    imports.push(Import {
437                        module_path: module_path.to_string(),
438                        imported_name,
439                        alias,
440                        is_external,
441                    });
442                }
443            }
444        }
445    }
446}
447
448impl Default for TypeScriptParser {
449    fn default() -> Self {
450        Self::new()
451    }
452}
453
454impl LanguageParser for TypeScriptParser {
455    fn extensions(&self) -> &[&str] {
456        &["ts", "tsx", "js", "jsx"]
457    }
458
459    fn extract_symbols(&self, source: &[u8], file_path: &Path) -> Result<Vec<Symbol>> {
460        if source.is_empty() {
461            return Ok(vec![]);
462        }
463
464        let tree = Self::parse_tree(source)?;
465        let root = tree.root_node();
466        let mut symbols = Vec::new();
467        let mut cursor = root.walk();
468
469        for node in root.children(&mut cursor) {
470            match node.kind() {
471                "export_statement" => {
472                    // Exported declaration: unwrap to find the inner declaration
473                    let mut inner_cursor = node.walk();
474                    for child in node.children(&mut inner_cursor) {
475                        if Self::map_symbol_kind(child.kind()).is_some() {
476                            symbols.extend(Self::extract_symbol(
477                                &child,
478                                source,
479                                file_path,
480                                Visibility::Public,
481                            ));
482                        }
483                    }
484                }
485                kind if Self::map_symbol_kind(kind).is_some() => {
486                    // Non-exported top-level declaration
487                    symbols.extend(Self::extract_symbol(
488                        &node,
489                        source,
490                        file_path,
491                        Visibility::Private,
492                    ));
493                }
494                _ => {}
495            }
496        }
497
498        Ok(symbols)
499    }
500
501    fn extract_calls(&self, source: &[u8], _file_path: &Path) -> Result<Vec<RawCallEdge>> {
502        if source.is_empty() {
503            return Ok(vec![]);
504        }
505
506        let tree = Self::parse_tree(source)?;
507        let root = tree.root_node();
508        let mut calls = Vec::new();
509        let mut cursor = root.walk();
510
511        Self::walk_calls(&mut cursor, source, &mut calls);
512
513        Ok(calls)
514    }
515
516    fn extract_types(&self, _source: &[u8], _file_path: &Path) -> Result<Vec<TypeInfo>> {
517        // Stub: will be enhanced later
518        Ok(vec![])
519    }
520
521    fn extract_imports(&self, source: &[u8], _file_path: &Path) -> Result<Vec<Import>> {
522        if source.is_empty() {
523            return Ok(vec![]);
524        }
525
526        let tree = Self::parse_tree(source)?;
527        let root = tree.root_node();
528        let mut imports = Vec::new();
529        let mut cursor = root.walk();
530
531        for node in root.children(&mut cursor) {
532            if node.kind() == "import_statement" {
533                imports.extend(Self::extract_import(&node, source));
534            }
535        }
536
537        Ok(imports)
538    }
539}