Skip to main content

dk_engine/parser/
engine.rs

1//! Generic query-driven parser engine.
2//!
3//! [`QueryDrivenParser`] uses tree-sitter's Query API to extract symbols,
4//! calls, and imports from any language. Each language supplies a
5//! [`LanguageConfig`](super::lang_config::LanguageConfig) with its grammar
6//! and S-expression queries; the engine compiles and runs them.
7
8use super::lang_config::{CommentStyle, LanguageConfig};
9use super::LanguageParser;
10use dk_core::{
11    CallKind, Error, FileAnalysis, Import, RawCallEdge, Result, Span, Symbol, TypeInfo,
12};
13use std::path::Path;
14use std::sync::Mutex;
15use streaming_iterator::StreamingIterator;
16use tree_sitter::{Node, Parser, Query, QueryCursor, Tree};
17use uuid::Uuid;
18
19/// A language-agnostic parser driven by tree-sitter queries.
20///
21/// One instance handles a single language, configured via [`LanguageConfig`].
22pub struct QueryDrivenParser {
23    config: Box<dyn LanguageConfig>,
24    parser: Mutex<Parser>,
25    symbols_query: Query,
26    calls_query: Option<Query>,
27    imports_query: Option<Query>,
28}
29
30impl QueryDrivenParser {
31    /// Create a new parser from a language configuration.
32    ///
33    /// Compiles the S-expression query strings from `config` into
34    /// [`Query`] objects. Returns [`Error::ParseError`] if compilation fails.
35    pub fn new(config: Box<dyn LanguageConfig>) -> Result<Self> {
36        let lang = config.language();
37
38        let symbols_query = Query::new(&lang, config.symbols_query()).map_err(|e| {
39            Error::ParseError(format!("Failed to compile symbols query: {e}"))
40        })?;
41
42        let calls_query = {
43            let q = config.calls_query();
44            if q.is_empty() {
45                None
46            } else {
47                Some(Query::new(&lang, q).map_err(|e| {
48                    Error::ParseError(format!("Failed to compile calls query: {e}"))
49                })?)
50            }
51        };
52
53        let imports_query = {
54            let q = config.imports_query();
55            if q.is_empty() {
56                None
57            } else {
58                Some(Query::new(&lang, q).map_err(|e| {
59                    Error::ParseError(format!("Failed to compile imports query: {e}"))
60                })?)
61            }
62        };
63
64        let mut parser = Parser::new();
65        parser
66            .set_language(&lang)
67            .map_err(|e| Error::ParseError(format!("Failed to set language: {e}")))?;
68
69        Ok(Self {
70            config,
71            parser: Mutex::new(parser),
72            symbols_query,
73            calls_query,
74            imports_query,
75        })
76    }
77
78    // ── Helpers ──
79
80    /// Parse source bytes into a tree-sitter syntax tree.
81    ///
82    /// Reuses the cached `Parser` instance to avoid repeated allocation
83    /// and language setup.
84    fn parse_tree(&self, source: &[u8]) -> Result<tree_sitter::Tree> {
85        let mut parser = self.parser.lock().unwrap_or_else(|e| e.into_inner());
86        parser
87            .parse(source, None)
88            .ok_or_else(|| Error::ParseError("tree-sitter parse returned None".into()))
89    }
90
91    /// Extract the UTF-8 text of a node from the source bytes.
92    fn node_text<'a>(node: &Node, source: &'a [u8]) -> &'a str {
93        let bytes = &source[node.start_byte()..node.end_byte()];
94        std::str::from_utf8(bytes).unwrap_or("")
95    }
96
97    /// Extract the first line of a node's text as its signature.
98    fn node_signature(node: &Node, source: &[u8]) -> Option<String> {
99        let text = Self::node_text(node, source);
100        let first_line = text.lines().next()?;
101        let trimmed = first_line.trim();
102        if trimmed.is_empty() {
103            None
104        } else {
105            Some(trimmed.to_string())
106        }
107    }
108
109    /// Collect doc-comment lines immediately preceding `node`.
110    ///
111    /// Walks backwards through previous siblings, collecting lines that
112    /// match the configured [`CommentStyle`]. Preserves the original
113    /// comment prefix (e.g. `#`, `///`, `//`, `/** */`) so that AST merge
114    /// can reconstruct valid source code.
115    ///
116    /// **Note:** Unlike the old hand-written parsers (which stripped the
117    /// prefix), `Symbol.doc_comment` now includes the raw prefix. This is
118    /// intentional — AST merge needs the prefix to reconstruct valid
119    /// source. Consumers that display doc comments should strip prefixes
120    /// at the presentation layer.
121    fn collect_doc_comments(&self, node: &Node, source: &[u8]) -> Option<String> {
122        let comment_prefix = match self.config.comment_style() {
123            CommentStyle::TripleSlash => "///",
124            CommentStyle::Hash => "#",
125            CommentStyle::SlashSlash => "//",
126            CommentStyle::DashDash => "--",
127        };
128
129        let mut lines = Vec::new();
130        let mut sibling = node.prev_sibling();
131
132        while let Some(prev) = sibling {
133            if prev.kind() == "line_comment" || prev.kind() == "comment" {
134                // Skip inline comments: if this comment is on the same line
135                // as a preceding non-comment sibling, it belongs to that
136                // sibling (e.g. `x = 60  # 60 seconds`), not to our node.
137                if let Some(before_comment) = prev.prev_sibling() {
138                    if before_comment.kind() != "comment"
139                        && before_comment.kind() != "line_comment"
140                        && before_comment.end_position().row == prev.start_position().row
141                    {
142                        break;
143                    }
144                }
145
146                let text = Self::node_text(&prev, source).trim();
147                if text.starts_with(comment_prefix) || text.starts_with("/*") {
148                    // Preserve the full comment text including prefix.
149                    // The `/*` branch captures JSDoc (`/** ... */`) blocks
150                    // for languages using CommentStyle::SlashSlash.
151                    lines.push(text.to_string());
152                    sibling = prev.prev_sibling();
153                    continue;
154                }
155            }
156            break;
157        }
158
159        if lines.is_empty() {
160            None
161        } else {
162            lines.reverse();
163            Some(lines.join("\n"))
164        }
165    }
166
167    /// Walk parent nodes to find the name of the enclosing function.
168    ///
169    /// Returns `"<module>"` if the node is at the top level.
170    fn enclosing_function_name(&self, node: &Node, source: &[u8]) -> String {
171        let named_function_kinds = [
172            "function_item",
173            "function_definition",
174            "function_declaration",
175            "method_definition",
176        ];
177        // Anonymous function forms whose name comes from an enclosing
178        // variable_declarator (e.g. `const fn = function() {}`)
179        let anonymous_function_kinds = ["arrow_function", "function_expression", "function"];
180
181        let mut current = node.parent();
182        while let Some(parent) = current {
183            let kind = parent.kind();
184            if named_function_kinds.contains(&kind) {
185                if let Some(name_node) = parent.child_by_field_name("name") {
186                    let name = Self::node_text(&name_node, source);
187                    if !name.is_empty() {
188                        return name.to_string();
189                    }
190                }
191            } else if anonymous_function_kinds.contains(&kind) {
192                // Check if assigned to a variable: const foo = function() {}
193                if let Some(gp) = parent.parent() {
194                    if gp.kind() == "variable_declarator" {
195                        if let Some(name_node) = gp.child_by_field_name("name") {
196                            let name = Self::node_text(&name_node, source);
197                            if !name.is_empty() {
198                                return name.to_string();
199                            }
200                        }
201                    }
202                }
203            }
204            current = parent.parent();
205        }
206        "<module>".to_string()
207    }
208}
209
210impl QueryDrivenParser {
211    /// Extract symbols from an already-parsed tree.
212    fn symbols_from_tree(
213        &self,
214        tree: &Tree,
215        source: &[u8],
216        file_path: &Path,
217    ) -> Vec<Symbol> {
218        let root = tree.root_node();
219        let capture_names = self.symbols_query.capture_names();
220
221        let mut cursor = QueryCursor::new();
222        let mut symbols = Vec::new();
223        let mut matches = cursor.matches(&self.symbols_query, root, source);
224
225        while let Some(m) = { matches.advance(); matches.get() } {
226            let mut name_text: Option<String> = None;
227            let mut definition_node: Option<Node> = None;
228            let mut kind_suffix: Option<String> = None;
229            let mut modifiers_text: Option<String> = None;
230
231            for capture in m.captures {
232                let capture_name = capture_names[capture.index as usize];
233
234                if capture_name == "name" {
235                    name_text = Some(Self::node_text(&capture.node, source).to_string());
236                } else if let Some(suffix) = capture_name.strip_prefix("definition.") {
237                    definition_node = Some(capture.node);
238                    kind_suffix = Some(suffix.to_string());
239                } else if capture_name == "modifiers" {
240                    modifiers_text = Some(Self::node_text(&capture.node, source).to_string());
241                }
242            }
243
244            // We need at least a name and a definition node with a kind suffix.
245            let name = match &name_text {
246                Some(n) if !n.is_empty() => n.as_str(),
247                _ => continue,
248            };
249            let def_node = match definition_node {
250                Some(n) => n,
251                None => continue,
252            };
253            let suffix = match &kind_suffix {
254                Some(s) => s.as_str(),
255                None => continue,
256            };
257
258            let symbol_kind = match self.config.map_capture_to_kind(suffix) {
259                Some(k) => k,
260                None => continue,
261            };
262
263            let visibility = self
264                .config
265                .resolve_visibility(modifiers_text.as_deref(), name);
266            let signature = Self::node_signature(&def_node, source);
267            let doc_comment = self.collect_doc_comments(&def_node, source);
268
269            let mut sym = Symbol {
270                id: Uuid::new_v4(),
271                name: name.to_string(),
272                qualified_name: name.to_string(),
273                kind: symbol_kind,
274                visibility,
275                file_path: file_path.to_path_buf(),
276                span: Span {
277                    start_byte: def_node.start_byte() as u32,
278                    end_byte: def_node.end_byte() as u32,
279                },
280                signature,
281                doc_comment,
282                parent: None,
283                last_modified_by: None,
284                last_modified_intent: None,
285            };
286
287            self.config.adjust_symbol(&mut sym, &def_node, source);
288            symbols.push(sym);
289        }
290
291        symbols
292    }
293
294    /// Extract call edges from an already-parsed tree.
295    fn calls_from_tree(&self, tree: &Tree, source: &[u8]) -> Vec<RawCallEdge> {
296        let calls_query = match &self.calls_query {
297            Some(q) => q,
298            None => return vec![],
299        };
300
301        let root = tree.root_node();
302        let capture_names = calls_query.capture_names();
303
304        let mut cursor = QueryCursor::new();
305        let mut calls = Vec::new();
306        let mut matches = cursor.matches(calls_query, root, source);
307
308        while let Some(m) = { matches.advance(); matches.get() } {
309            let mut callee_text: Option<String> = None;
310            let mut method_callee_text: Option<String> = None;
311            let mut call_node: Option<Node> = None;
312            let mut first_node: Option<Node> = None;
313
314            for capture in m.captures {
315                let capture_name = capture_names[capture.index as usize];
316
317                if first_node.is_none() {
318                    first_node = Some(capture.node);
319                }
320
321                match capture_name {
322                    "callee" => {
323                        callee_text =
324                            Some(Self::node_text(&capture.node, source).to_string());
325                    }
326                    "method_callee" => {
327                        method_callee_text =
328                            Some(Self::node_text(&capture.node, source).to_string());
329                    }
330                    "call" => call_node = Some(capture.node),
331                    _ => {}
332                }
333            }
334
335            // Determine call kind and callee name.
336            let (callee_name, call_kind) = if let Some(method) =
337                method_callee_text.filter(|s| !s.is_empty())
338            {
339                (method, CallKind::MethodCall)
340            } else if let Some(direct) = callee_text.filter(|s| !s.is_empty()) {
341                (direct, CallKind::DirectCall)
342            } else {
343                continue;
344            };
345
346            // Use the @call node for span, falling back to the first captured node.
347            let span_node = call_node
348                .or(first_node)
349                .expect("match has at least one capture");
350
351            let caller_name = self.enclosing_function_name(&span_node, source);
352
353            calls.push(RawCallEdge {
354                caller_name,
355                callee_name,
356                call_site: Span {
357                    start_byte: span_node.start_byte() as u32,
358                    end_byte: span_node.end_byte() as u32,
359                },
360                kind: call_kind,
361            });
362        }
363
364        calls
365    }
366
367    /// Extract imports from an already-parsed tree.
368    fn imports_from_tree(&self, tree: &Tree, source: &[u8]) -> Vec<Import> {
369        let imports_query = match &self.imports_query {
370            Some(q) => q,
371            None => return vec![],
372        };
373
374        let root = tree.root_node();
375        let capture_names = imports_query.capture_names();
376
377        let mut cursor = QueryCursor::new();
378        let mut imports = Vec::new();
379        let mut matches = cursor.matches(imports_query, root, source);
380
381        while let Some(m) = { matches.advance(); matches.get() } {
382            let mut module_text: Option<String> = None;
383            let mut import_name_text: Option<String> = None;
384            let mut alias_text: Option<String> = None;
385            let mut is_relative_import = false;
386
387            for capture in m.captures {
388                let capture_name = capture_names[capture.index as usize];
389
390                match capture_name {
391                    "module" => {
392                        let text = Self::node_text(&capture.node, source);
393                        // Strip surrounding quotes if present.
394                        module_text = Some(
395                            text.trim_matches(|c| c == '"' || c == '\'').to_string(),
396                        );
397                    }
398                    "import_name" => {
399                        import_name_text =
400                            Some(Self::node_text(&capture.node, source).to_string());
401                    }
402                    "alias" => {
403                        alias_text = Some(Self::node_text(&capture.node, source).to_string());
404                    }
405                    "_relative" => {
406                        // Marker capture: the import query flagged this as a
407                        // relative/internal import (e.g. Ruby require_relative).
408                        is_relative_import = true;
409                    }
410                    _ => {}
411                }
412            }
413
414            let module_path = match module_text {
415                Some(ref m) if !m.is_empty() => m.clone(),
416                _ => continue,
417            };
418
419            let imported_name = import_name_text
420                .filter(|s| !s.is_empty())
421                .unwrap_or_else(|| {
422                    // Derive imported name from the last segment of the module path.
423                    module_path
424                        .rsplit(['/', '.', ':'])
425                        .next()
426                        .unwrap_or(&module_path)
427                        .to_string()
428                });
429
430            let alias = alias_text.filter(|s| !s.is_empty());
431
432            // If the query flagged this as a relative import (e.g. Ruby
433            // require_relative), it is always internal regardless of path.
434            let is_external = if is_relative_import {
435                false
436            } else {
437                self.config.is_external_import(&module_path)
438            };
439
440            imports.push(Import {
441                module_path,
442                imported_name,
443                alias,
444                is_external,
445            });
446        }
447
448        imports
449    }
450}
451
452impl LanguageParser for QueryDrivenParser {
453    fn extensions(&self) -> &[&str] {
454        self.config.extensions()
455    }
456
457    fn extract_symbols(&self, source: &[u8], file_path: &Path) -> Result<Vec<Symbol>> {
458        if source.is_empty() {
459            return Ok(vec![]);
460        }
461        let tree = self.parse_tree(source)?;
462        Ok(self.symbols_from_tree(&tree, source, file_path))
463    }
464
465    fn extract_calls(&self, source: &[u8], _file_path: &Path) -> Result<Vec<RawCallEdge>> {
466        if source.is_empty() {
467            return Ok(vec![]);
468        }
469        let tree = self.parse_tree(source)?;
470        Ok(self.calls_from_tree(&tree, source))
471    }
472
473    fn extract_types(&self, _source: &[u8], _file_path: &Path) -> Result<Vec<TypeInfo>> {
474        Ok(vec![])
475    }
476
477    fn extract_imports(&self, source: &[u8], _file_path: &Path) -> Result<Vec<Import>> {
478        if source.is_empty() {
479            return Ok(vec![]);
480        }
481        let tree = self.parse_tree(source)?;
482        Ok(self.imports_from_tree(&tree, source))
483    }
484
485    /// Parse once and extract all data — avoids triple-parsing the same source.
486    fn parse_file(&self, source: &[u8], file_path: &Path) -> Result<FileAnalysis> {
487        if source.is_empty() {
488            return Ok(FileAnalysis {
489                symbols: vec![],
490                calls: vec![],
491                types: vec![],
492                imports: vec![],
493            });
494        }
495        let tree = self.parse_tree(source)?;
496        Ok(FileAnalysis {
497            symbols: self.symbols_from_tree(&tree, source, file_path),
498            calls: self.calls_from_tree(&tree, source),
499            types: vec![],
500            imports: self.imports_from_tree(&tree, source),
501        })
502    }
503}