Skip to main content

dk_engine/parser/langs/
typescript.rs

1//! TypeScript/JavaScript language configuration for the query-driven parser.
2
3use crate::parser::engine::QueryDrivenParser;
4use crate::parser::lang_config::{CommentStyle, LanguageConfig};
5use crate::parser::LanguageParser;
6use dk_core::{FileAnalysis, Import, RawCallEdge, Result, Symbol, SymbolKind, TypeInfo, Visibility};
7use std::collections::HashMap;
8use std::path::Path;
9use tree_sitter::Language;
10
11/// TypeScript language configuration for [`QueryDrivenParser`].
12///
13/// Uses the TSX grammar (a superset of TypeScript) so `.ts`, `.tsx`, `.js`,
14/// and `.jsx` files are all handled correctly.
15pub struct TypeScriptConfig;
16
17impl LanguageConfig for TypeScriptConfig {
18    fn language(&self) -> Language {
19        tree_sitter_typescript::LANGUAGE_TSX.into()
20    }
21
22    fn extensions(&self) -> &'static [&'static str] {
23        &["ts", "tsx", "js", "jsx"]
24    }
25
26    fn symbols_query(&self) -> &'static str {
27        include_str!("../queries/typescript_symbols.scm")
28    }
29
30    fn calls_query(&self) -> &'static str {
31        include_str!("../queries/typescript_calls.scm")
32    }
33
34    fn imports_query(&self) -> &'static str {
35        include_str!("../queries/typescript_imports.scm")
36    }
37
38    fn comment_style(&self) -> CommentStyle {
39        CommentStyle::SlashSlash
40    }
41
42    fn resolve_visibility(&self, modifiers: Option<&str>, _name: &str) -> Visibility {
43        // If @modifiers captured text (meaning the declaration was inside an
44        // export_statement), the symbol is Public. Otherwise Private.
45        match modifiers {
46            Some(_) => Visibility::Public,
47            None => Visibility::Private,
48        }
49    }
50
51    fn adjust_symbol(&self, sym: &mut Symbol, node: &tree_sitter::Node, source: &[u8]) {
52        // For expression_statement nodes (captured as @definition.expression),
53        // derive a meaningful name from the call structure.
54        // e.g. `router.get("/health", ...)` → "router.get:/health"
55        //       `app.use(middleware)` → "app.use"
56        //       `module.exports = ...` → "module.exports"
57        //       `export default router` → "export default router"
58        if sym.kind == SymbolKind::Const && node.kind() == "call_expression" {
59            // The @definition.expression captures the call_expression inside
60            // an expression_statement. Walk up to get the expression_statement
61            // span and doc comments.
62            if let Some(parent) = node.parent() {
63                if parent.kind() == "expression_statement" {
64                    sym.span = dk_core::Span {
65                        start_byte: parent.start_byte() as u32,
66                        end_byte: parent.end_byte() as u32,
67                    };
68                    // Collect doc comments from the expression_statement's
69                    // preceding siblings (the engine only looked at the
70                    // call_expression's siblings, which don't include comments).
71                    if sym.doc_comment.is_none() {
72                        sym.doc_comment = Self::collect_preceding_comments(&parent, source);
73                    }
74                }
75            }
76
77            // Derive a name from the call: func_text + optional first string arg
78            if let Some(func_node) = node.child_by_field_name("function") {
79                let func_text = std::str::from_utf8(
80                    &source[func_node.start_byte()..func_node.end_byte()],
81                )
82                .unwrap_or("")
83                .to_string();
84
85                // Look for the first string argument to append as a path
86                let name = if let Some(args) = node.child_by_field_name("arguments") {
87                    let mut path_name = None;
88                    let mut cursor = args.walk();
89                    for arg_child in args.children(&mut cursor) {
90                        if arg_child.kind() == "string" || arg_child.kind() == "template_string" {
91                            let raw = std::str::from_utf8(
92                                &source[arg_child.start_byte()..arg_child.end_byte()],
93                            )
94                            .unwrap_or("");
95                            let path = raw
96                                .trim_matches(|c| c == '"' || c == '\'' || c == '`')
97                                .to_string();
98                            path_name = Some(format!("{func_text}:{path}"));
99                            break;
100                        }
101                    }
102                    path_name.unwrap_or(func_text)
103                } else {
104                    func_text
105                };
106
107                sym.name = name.clone();
108                sym.qualified_name = name;
109            }
110        } else if sym.kind == SymbolKind::Const && node.kind() == "assignment_expression" {
111            // Assignment: use the left-hand side as the name
112            if let Some(parent) = node.parent() {
113                if parent.kind() == "expression_statement" {
114                    sym.span = dk_core::Span {
115                        start_byte: parent.start_byte() as u32,
116                        end_byte: parent.end_byte() as u32,
117                    };
118                    if sym.doc_comment.is_none() {
119                        sym.doc_comment = Self::collect_preceding_comments(&parent, source);
120                    }
121                }
122            }
123        } else if node.kind() == "export_statement" {
124            // `export default <expr>` — prefix the name
125            let name = format!("export default {}", sym.name);
126            sym.name = name.clone();
127            sym.qualified_name = name;
128        }
129    }
130
131    fn is_external_import(&self, module_path: &str) -> bool {
132        !module_path.starts_with('.') && !module_path.starts_with('/')
133    }
134}
135
136impl TypeScriptConfig {
137    /// Collect `//` and `/** */` comment lines immediately preceding a node.
138    ///
139    /// Preserves the full comment text (including prefix) so that AST
140    /// merge can reconstruct valid TypeScript.
141    fn collect_preceding_comments(
142        node: &tree_sitter::Node,
143        source: &[u8],
144    ) -> Option<String> {
145        let mut lines = Vec::new();
146        let mut sibling = node.prev_sibling();
147
148        while let Some(prev) = sibling {
149            if prev.kind() == "comment" {
150                let text = std::str::from_utf8(&source[prev.start_byte()..prev.end_byte()])
151                    .unwrap_or("")
152                    .trim()
153                    .to_string();
154                if text.starts_with("//") || text.starts_with("/*") {
155                    lines.push(text);
156                    sibling = prev.prev_sibling();
157                    continue;
158                }
159            }
160            break;
161        }
162
163        if lines.is_empty() {
164            None
165        } else {
166            lines.reverse();
167            Some(lines.join("\n"))
168        }
169    }
170}
171
172/// TypeScript parser wrapper that adds qualified-name deduplication.
173///
174/// Multiple top-level expressions can produce the same `qualified_name`
175/// (e.g. several `app.use(...)` calls). This wrapper calls the generic
176/// [`QueryDrivenParser`] and then appends `#N` suffixes to duplicates so
177/// every symbol has a unique key for the AST merge BTreeMap.
178pub struct TypeScriptParser {
179    inner: QueryDrivenParser,
180}
181
182impl TypeScriptParser {
183    /// Create a new TypeScript query-driven parser.
184    pub fn new() -> Result<Self> {
185        Ok(Self {
186            inner: QueryDrivenParser::new(Box::new(TypeScriptConfig))?,
187        })
188    }
189}
190
191impl Default for TypeScriptParser {
192    fn default() -> Self {
193        Self::new().expect("TypeScript parser initialization should not fail")
194    }
195}
196
197impl TypeScriptParser {
198    /// Filter nested symbols and deduplicate qualified names.
199    fn dedup_symbols(mut symbols: Vec<Symbol>) -> Vec<Symbol> {
200        // Filter out nested symbols: if one symbol's span is entirely
201        // inside another's, remove the inner one. This prevents extracting
202        // `res.json(...)` or `const note = ...` from inside arrow functions.
203        let ranges: Vec<(u32, u32)> = symbols
204            .iter()
205            .map(|s| (s.span.start_byte, s.span.end_byte))
206            .collect();
207        symbols.retain(|sym| {
208            let start = sym.span.start_byte;
209            let end = sym.span.end_byte;
210            !ranges.iter().any(|(rs, re)| *rs < start && end < *re)
211        });
212
213        // Deduplicate qualified_names: append #N for duplicates.
214        let mut seen: HashMap<String, usize> = HashMap::new();
215        for sym in &mut symbols {
216            let count = seen.entry(sym.qualified_name.clone()).or_insert(0);
217            *count += 1;
218            if *count > 1 {
219                sym.qualified_name = format!("{}#{}", sym.qualified_name, count);
220                sym.name = sym.qualified_name.clone();
221            }
222        }
223
224        symbols
225    }
226}
227
228impl LanguageParser for TypeScriptParser {
229    fn extensions(&self) -> &[&str] {
230        self.inner.extensions()
231    }
232
233    fn extract_symbols(&self, source: &[u8], file_path: &Path) -> Result<Vec<Symbol>> {
234        let symbols = self.inner.extract_symbols(source, file_path)?;
235        Ok(Self::dedup_symbols(symbols))
236    }
237
238    fn extract_calls(&self, source: &[u8], file_path: &Path) -> Result<Vec<RawCallEdge>> {
239        self.inner.extract_calls(source, file_path)
240    }
241
242    fn extract_types(&self, source: &[u8], file_path: &Path) -> Result<Vec<TypeInfo>> {
243        self.inner.extract_types(source, file_path)
244    }
245
246    fn extract_imports(&self, source: &[u8], file_path: &Path) -> Result<Vec<Import>> {
247        self.inner.extract_imports(source, file_path)
248    }
249
250    fn parse_file(&self, source: &[u8], file_path: &Path) -> Result<FileAnalysis> {
251        let mut analysis = self.inner.parse_file(source, file_path)?;
252        analysis.symbols = Self::dedup_symbols(analysis.symbols);
253        Ok(analysis)
254    }
255}