Skip to main content

mimir_graph/
languages.rs

1//! Per-language tree-sitter adapters: which AST nodes are definitions,
2//! scopes, calls, and imports — and how to read docs/signatures off them.
3
4use tree_sitter::Node;
5
6use crate::extract::ImportRef;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Lang {
10    Rust,
11    TypeScript,
12    Tsx,
13    Python,
14    Go,
15}
16
17impl Lang {
18    pub fn from_path(path: &str) -> Option<Lang> {
19        let ext = path.rsplit('.').next()?;
20        Some(match ext {
21            "rs" => Lang::Rust,
22            "ts" | "mts" | "cts" => Lang::TypeScript,
23            "tsx" | "jsx" | "js" | "mjs" | "cjs" => Lang::Tsx,
24            "py" | "pyi" => Lang::Python,
25            "go" => Lang::Go,
26            _ => return None,
27        })
28    }
29
30    pub fn name(&self) -> &'static str {
31        match self {
32            Lang::Rust => "rust",
33            Lang::TypeScript | Lang::Tsx => "typescript",
34            Lang::Python => "python",
35            Lang::Go => "go",
36        }
37    }
38
39    pub fn language(&self) -> tree_sitter::Language {
40        match self {
41            Lang::Rust => tree_sitter_rust::LANGUAGE.into(),
42            Lang::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
43            Lang::Tsx => tree_sitter_typescript::LANGUAGE_TSX.into(),
44            Lang::Python => tree_sitter_python::LANGUAGE.into(),
45            Lang::Go => tree_sitter_go::LANGUAGE.into(),
46        }
47    }
48
49    pub fn separator(&self) -> String {
50        "::".into()
51    }
52
53    /// Is this node a symbol definition? Returns (name, kind). The name may
54    /// be pre-qualified with `::` (Go methods carry their receiver).
55    pub fn definition(&self, node: Node, src: &str) -> Option<(String, &'static str)> {
56        let text = |n: Node| src[n.byte_range()].to_string();
57        match self {
58            Lang::Rust => match node.kind() {
59                "function_item" => Some((text(node.child_by_field_name("name")?), "function")),
60                "struct_item" => Some((text(node.child_by_field_name("name")?), "struct")),
61                "enum_item" => Some((text(node.child_by_field_name("name")?), "enum")),
62                "trait_item" => Some((text(node.child_by_field_name("name")?), "trait")),
63                "union_item" => Some((text(node.child_by_field_name("name")?), "struct")),
64                _ => None,
65            },
66            Lang::TypeScript | Lang::Tsx => match node.kind() {
67                "function_declaration" | "generator_function_declaration" => {
68                    Some((text(node.child_by_field_name("name")?), "function"))
69                }
70                "class_declaration" => Some((text(node.child_by_field_name("name")?), "class")),
71                "method_definition" => {
72                    let name = text(node.child_by_field_name("name")?);
73                    if name == "constructor" {
74                        return None;
75                    }
76                    Some((name, "method"))
77                }
78                "interface_declaration" => {
79                    Some((text(node.child_by_field_name("name")?), "interface"))
80                }
81                "enum_declaration" => Some((text(node.child_by_field_name("name")?), "enum")),
82                "type_alias_declaration" => Some((text(node.child_by_field_name("name")?), "type")),
83                // const f = (..) => ..  /  const f = function(..) {..}
84                "variable_declarator" => {
85                    let value = node.child_by_field_name("value")?;
86                    if matches!(value.kind(), "arrow_function" | "function_expression") {
87                        let name = node.child_by_field_name("name")?;
88                        if name.kind() == "identifier" {
89                            return Some((text(name), "function"));
90                        }
91                    }
92                    None
93                }
94                _ => None,
95            },
96            Lang::Python => match node.kind() {
97                "function_definition" => {
98                    Some((text(node.child_by_field_name("name")?), "function"))
99                }
100                "class_definition" => Some((text(node.child_by_field_name("name")?), "class")),
101                _ => None,
102            },
103            Lang::Go => match node.kind() {
104                "function_declaration" => {
105                    Some((text(node.child_by_field_name("name")?), "function"))
106                }
107                "method_declaration" => {
108                    let name = text(node.child_by_field_name("name")?);
109                    let recv = node
110                        .child_by_field_name("receiver")
111                        .and_then(|r| receiver_type(r, src));
112                    Some((
113                        match recv {
114                            Some(t) => format!("{t}::{name}"),
115                            None => name,
116                        },
117                        "method",
118                    ))
119                }
120                "type_spec" => {
121                    let name = text(node.child_by_field_name("name")?);
122                    let kind = match node.child_by_field_name("type").map(|t| t.kind()) {
123                        Some("struct_type") => "struct",
124                        Some("interface_type") => "interface",
125                        _ => "type",
126                    };
127                    Some((name, kind))
128                }
129                _ => None,
130            },
131        }
132    }
133
134    /// Containers that qualify children without being symbols themselves.
135    pub fn scope_only(&self, node: Node, src: &str) -> Option<String> {
136        match self {
137            Lang::Rust => match node.kind() {
138                // impl Foo { .. } / impl Trait for Foo { .. } → scope "Foo"
139                "impl_item" => {
140                    let ty = node.child_by_field_name("type")?;
141                    Some(base_type_name(ty, src))
142                }
143                "mod_item" => Some(src[node.child_by_field_name("name")?.byte_range()].to_string()),
144                _ => None,
145            },
146            _ => None,
147        }
148    }
149
150    /// Field holding the body (cut point for signatures), per node kind.
151    pub fn body_field(&self) -> Option<&'static str> {
152        // All supported definition kinds use "body" except TS declarators,
153        // which signature_text handles via the generic fallback.
154        Some("body")
155    }
156
157    /// If this node is a call, return the bare callee name.
158    pub fn call(&self, node: Node, src: &str) -> Option<String> {
159        let text = |n: Node| src[n.byte_range()].to_string();
160        match self {
161            Lang::Rust => {
162                if node.kind() != "call_expression" {
163                    return None;
164                }
165                let f = node.child_by_field_name("function")?;
166                match f.kind() {
167                    "identifier" => Some(text(f)),
168                    "field_expression" => f.child_by_field_name("field").map(text),
169                    "scoped_identifier" => f.child_by_field_name("name").map(text),
170                    "generic_function" => {
171                        let inner = f.child_by_field_name("function")?;
172                        match inner.kind() {
173                            "identifier" => Some(text(inner)),
174                            "scoped_identifier" => inner.child_by_field_name("name").map(text),
175                            _ => None,
176                        }
177                    }
178                    _ => None,
179                }
180            }
181            Lang::TypeScript | Lang::Tsx => {
182                if node.kind() != "call_expression" {
183                    return None;
184                }
185                let f = node.child_by_field_name("function")?;
186                match f.kind() {
187                    "identifier" => Some(text(f)),
188                    "member_expression" => f.child_by_field_name("property").map(text),
189                    _ => None,
190                }
191            }
192            Lang::Python => {
193                if node.kind() != "call" {
194                    return None;
195                }
196                let f = node.child_by_field_name("function")?;
197                match f.kind() {
198                    "identifier" => Some(text(f)),
199                    "attribute" => f.child_by_field_name("attribute").map(text),
200                    _ => None,
201                }
202            }
203            Lang::Go => {
204                if node.kind() != "call_expression" {
205                    return None;
206                }
207                let f = node.child_by_field_name("function")?;
208                match f.kind() {
209                    "identifier" => Some(text(f)),
210                    "selector_expression" => f.child_by_field_name("field").map(text),
211                    _ => None,
212                }
213            }
214        }
215    }
216
217    /// Collect imports declared by this node.
218    pub fn imports(&self, node: Node, src: &str, out: &mut Vec<ImportRef>) {
219        let text = |n: Node| src[n.byte_range()].to_string();
220        match self {
221            Lang::Rust => {
222                if node.kind() == "use_declaration" {
223                    if let Some(arg) = node.child_by_field_name("argument") {
224                        rust_use_tree(arg, src, "", out);
225                    }
226                }
227            }
228            Lang::TypeScript | Lang::Tsx => {
229                if node.kind() != "import_statement" {
230                    return;
231                }
232                let Some(source) = node
233                    .child_by_field_name("source")
234                    .map(|s| text(s).trim_matches(['"', '\'']).to_string())
235                else {
236                    return;
237                };
238                let mut cursor = node.walk();
239                for child in node.children(&mut cursor) {
240                    if child.kind() != "import_clause" {
241                        continue;
242                    }
243                    let mut c2 = child.walk();
244                    for part in child.children(&mut c2) {
245                        match part.kind() {
246                            "identifier" => out.push(ImportRef {
247                                local: text(part),
248                                source: source.clone(),
249                            }),
250                            "named_imports" => {
251                                let mut c3 = part.walk();
252                                for spec in part.children(&mut c3) {
253                                    if spec.kind() != "import_specifier" {
254                                        continue;
255                                    }
256                                    let local = spec
257                                        .child_by_field_name("alias")
258                                        .or_else(|| spec.child_by_field_name("name"))
259                                        .map(text);
260                                    if let Some(local) = local {
261                                        out.push(ImportRef {
262                                            local,
263                                            source: source.clone(),
264                                        });
265                                    }
266                                }
267                            }
268                            "namespace_import" => {
269                                // import * as ns from "x"
270                                let mut c3 = part.walk();
271                                for id in part.children(&mut c3) {
272                                    if id.kind() == "identifier" {
273                                        out.push(ImportRef {
274                                            local: text(id),
275                                            source: source.clone(),
276                                        });
277                                    }
278                                }
279                            }
280                            _ => {}
281                        }
282                    }
283                }
284            }
285            Lang::Python => match node.kind() {
286                "import_statement" => {
287                    let mut cursor = node.walk();
288                    for child in node.children(&mut cursor) {
289                        match child.kind() {
290                            "dotted_name" => out.push(ImportRef {
291                                local: text(child)
292                                    .rsplit('.')
293                                    .next()
294                                    .unwrap_or_default()
295                                    .to_string(),
296                                source: text(child),
297                            }),
298                            "aliased_import" => {
299                                let name = child.child_by_field_name("name").map(text);
300                                let alias = child.child_by_field_name("alias").map(text);
301                                if let (Some(name), Some(alias)) = (name, alias) {
302                                    out.push(ImportRef {
303                                        local: alias,
304                                        source: name,
305                                    });
306                                }
307                            }
308                            _ => {}
309                        }
310                    }
311                }
312                "import_from_statement" => {
313                    let Some(module) = node.child_by_field_name("module_name").map(text) else {
314                        return;
315                    };
316                    let mut cursor = node.walk();
317                    let mut past_import = false;
318                    for child in node.children(&mut cursor) {
319                        if child.kind() == "import" {
320                            past_import = true;
321                            continue;
322                        }
323                        if !past_import {
324                            continue;
325                        }
326                        match child.kind() {
327                            "dotted_name" => out.push(ImportRef {
328                                local: text(child),
329                                source: module.clone(),
330                            }),
331                            "aliased_import" => {
332                                if let Some(alias) = child.child_by_field_name("alias").map(text) {
333                                    out.push(ImportRef {
334                                        local: alias,
335                                        source: module.clone(),
336                                    });
337                                }
338                            }
339                            _ => {}
340                        }
341                    }
342                }
343                _ => {}
344            },
345            Lang::Go => {
346                if node.kind() != "import_spec" {
347                    return;
348                }
349                let Some(path) = node
350                    .child_by_field_name("path")
351                    .map(|p| text(p).trim_matches('"').to_string())
352                else {
353                    return;
354                };
355                let local = node
356                    .child_by_field_name("name")
357                    .map(text)
358                    .unwrap_or_else(|| path.rsplit('/').next().unwrap_or(&path).to_string());
359                out.push(ImportRef {
360                    local,
361                    source: path,
362                });
363            }
364        }
365    }
366
367    /// Doc comment attached to a definition node.
368    pub fn doc_comment(&self, node: Node, src: &str) -> Option<String> {
369        match self {
370            Lang::Python => {
371                // Docstring: first statement of the body is a string literal.
372                let body = node.child_by_field_name("body")?;
373                let first = body.named_child(0)?;
374                if first.kind() != "expression_statement" {
375                    return None;
376                }
377                let s = first.named_child(0)?;
378                if s.kind() != "string" {
379                    return None;
380                }
381                let raw = &src[s.byte_range()];
382                let cleaned = raw
383                    .trim_start_matches(['r', 'b', 'f', 'u', 'R', 'B', 'F', 'U'])
384                    .trim_matches(['"', '\''])
385                    .trim();
386                Some(cleaned.lines().next().unwrap_or("").trim().to_string())
387                    .filter(|s| !s.is_empty())
388            }
389            Lang::Rust | Lang::Go | Lang::TypeScript | Lang::Tsx => {
390                // Contiguous comment siblings directly above the node
391                // (a blank line breaks the chain; `//!` belongs to the
392                // module, not this item).
393                let mut lines: Vec<String> = Vec::new();
394                let mut expect_row = node.start_position().row;
395                let mut prev = node.prev_sibling();
396                while let Some(p) = prev {
397                    if !p.kind().contains("comment")
398                        || expect_row.saturating_sub(p.end_position().row) > 1
399                        || src[p.byte_range()].starts_with("//!")
400                    {
401                        break;
402                    }
403                    lines.push(src[p.byte_range()].to_string());
404                    expect_row = p.start_position().row;
405                    prev = p.prev_sibling();
406                }
407                if lines.is_empty() {
408                    return None;
409                }
410                lines.reverse();
411                let cleaned: Vec<String> = lines
412                    .iter()
413                    .flat_map(|c| c.lines())
414                    .map(|l| {
415                        l.trim()
416                            .trim_start_matches("///")
417                            .trim_start_matches("//!")
418                            .trim_start_matches("//")
419                            .trim_start_matches("/**")
420                            .trim_start_matches("/*")
421                            .trim_end_matches("*/")
422                            .trim_start_matches('*')
423                            .trim()
424                            .to_string()
425                    })
426                    .filter(|l| !l.is_empty())
427                    .collect();
428                if cleaned.is_empty() {
429                    None
430                } else {
431                    Some(cleaned.join(" ").chars().take(300).collect())
432                }
433            }
434        }
435    }
436}
437
438/// `impl Foo`, `impl Foo<T>`, `impl Trait for Foo<T>` → "Foo".
439fn base_type_name(ty: Node, src: &str) -> String {
440    match ty.kind() {
441        "generic_type" => ty
442            .child_by_field_name("type")
443            .map(|t| src[t.byte_range()].to_string())
444            .unwrap_or_else(|| src[ty.byte_range()].to_string()),
445        _ => src[ty.byte_range()].to_string(),
446    }
447}
448
449/// Go receiver `(s *Server)` → "Server".
450fn receiver_type(receiver: Node, src: &str) -> Option<String> {
451    let mut cursor = receiver.walk();
452    for child in receiver.children(&mut cursor) {
453        if child.kind() == "parameter_declaration" {
454            let ty = child.child_by_field_name("type")?;
455            let base = match ty.kind() {
456                "pointer_type" => ty.named_child(0)?,
457                _ => ty,
458            };
459            return Some(src[base.byte_range()].to_string());
460        }
461    }
462    None
463}
464
465/// Rust use-tree walker: `use a::{b::C, d as E};` → C←a::b::C, E←a::d.
466fn rust_use_tree(node: Node, src: &str, prefix: &str, out: &mut Vec<ImportRef>) {
467    let text = |n: Node| src[n.byte_range()].to_string();
468    let join = |prefix: &str, seg: &str| {
469        if prefix.is_empty() {
470            seg.to_string()
471        } else {
472            format!("{prefix}::{seg}")
473        }
474    };
475    match node.kind() {
476        "identifier" | "crate" | "self" | "super" => {
477            let seg = text(node);
478            out.push(ImportRef {
479                local: seg.clone(),
480                source: join(prefix, &seg),
481            });
482        }
483        "scoped_identifier" => {
484            let full = join(prefix, &text(node));
485            let local = node
486                .child_by_field_name("name")
487                .map(text)
488                .unwrap_or_default();
489            if !local.is_empty() {
490                out.push(ImportRef {
491                    local,
492                    source: full,
493                });
494            }
495        }
496        "use_as_clause" => {
497            let alias = node.child_by_field_name("alias").map(text);
498            let path = node.child_by_field_name("path").map(text);
499            if let (Some(alias), Some(path)) = (alias, path) {
500                out.push(ImportRef {
501                    local: alias,
502                    source: join(prefix, &path),
503                });
504            }
505        }
506        "scoped_use_list" => {
507            let new_prefix = node
508                .child_by_field_name("path")
509                .map(|p| join(prefix, &text(p)))
510                .unwrap_or_else(|| prefix.to_string());
511            if let Some(list) = node.child_by_field_name("list") {
512                let mut cursor = list.walk();
513                for child in list.named_children(&mut cursor) {
514                    rust_use_tree(child, src, &new_prefix, out);
515                }
516            }
517        }
518        "use_list" => {
519            let mut cursor = node.walk();
520            for child in node.named_children(&mut cursor) {
521                rust_use_tree(child, src, prefix, out);
522            }
523        }
524        // use_wildcard and attributes: nothing useful to bind.
525        _ => {}
526    }
527}