Skip to main content

lvz_context/
lang.rs

1//! Language detection and per-language tree-sitter configuration.
2//!
3//! Each supported [`Lang`] knows its tree-sitter grammar and which definition nodes carry
4//! an elidable `body` field — the knobs the skeletoniser (`super::skeleton`) needs to keep
5//! signatures while dropping bodies.
6
7use tree_sitter::Language as TsLanguage;
8
9/// A source language Lavoisier can parse for skeletons and symbols.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum Lang {
12    Rust,
13    Python,
14    JavaScript,
15    TypeScript,
16}
17
18/// How to elide bodies for a language.
19pub(crate) struct LangSpec {
20    /// Definition node kinds whose `body` field should be replaced by [`elision`](Self::elision).
21    pub def_kinds: &'static [&'static str],
22    /// Placeholder text the body is replaced with (kept syntactically suggestive, not valid).
23    pub elision: &'static str,
24    /// Named-definition node kinds that become nodes in the symbol-dependency graph.
25    pub symbol_kinds: &'static [&'static str],
26    /// Keep a leading docstring (a bare string statement as the body's first item) when eliding
27    /// the rest of the body — §6.1 wants docstrings retained as high-signal context.
28    /// True only for Python, whose `def` bodies conventionally open with a `"""…"""` docstring.
29    pub keeps_docstring: bool,
30    /// Leaf node kinds that count as a *reference* to a name when resolving symbol-dependency
31    /// edges (`super::symbols`). Restricting to real identifier nodes (vs. raw substring search)
32    /// is what makes a name in a string or comment stop creating a spurious edge.
33    pub ref_ident_kinds: &'static [&'static str],
34    /// Node kinds that introduce **local bindings** (parameters, `let`/variable declarations).
35    /// The identifiers in their binding position (`pattern`/`name` field, else direct children)
36    /// are locals, so they are excluded from references — this is the scope/shadowing fix: a local
37    /// variable that happens to share a top-level symbol's name no longer links to that symbol.
38    pub binder_kinds: &'static [&'static str],
39}
40
41impl Lang {
42    /// Best-effort detection from a file path's extension.
43    pub fn from_path(path: &str) -> Option<Lang> {
44        let ext = path.rsplit('.').next()?.to_ascii_lowercase();
45        Some(match ext.as_str() {
46            "rs" => Lang::Rust,
47            "py" | "pyi" => Lang::Python,
48            "js" | "jsx" | "mjs" | "cjs" => Lang::JavaScript,
49            "ts" | "tsx" | "mts" | "cts" => Lang::TypeScript,
50            _ => return None,
51        })
52    }
53
54    /// The tree-sitter grammar for this language.
55    pub(crate) fn ts_language(self) -> TsLanguage {
56        match self {
57            Lang::Rust => tree_sitter_rust::LANGUAGE.into(),
58            Lang::Python => tree_sitter_python::LANGUAGE.into(),
59            Lang::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
60            Lang::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
61        }
62    }
63
64    pub(crate) fn spec(self) -> LangSpec {
65        match self {
66            Lang::Rust => LangSpec {
67                def_kinds: &["function_item"],
68                elision: "{ … }",
69                symbol_kinds: &[
70                    "function_item",
71                    "struct_item",
72                    "enum_item",
73                    "trait_item",
74                    "type_item",
75                    "const_item",
76                    "static_item",
77                ],
78                keeps_docstring: false,
79                ref_ident_kinds: &["identifier", "type_identifier"],
80                binder_kinds: &["parameter", "let_declaration", "closure_parameters"],
81            },
82            Lang::Python => LangSpec {
83                def_kinds: &["function_definition"],
84                elision: "...",
85                symbol_kinds: &["function_definition", "class_definition"],
86                keeps_docstring: true,
87                ref_ident_kinds: &["identifier"],
88                binder_kinds: &["parameters", "lambda_parameters"],
89            },
90            Lang::JavaScript => LangSpec {
91                def_kinds: &[
92                    "function_declaration",
93                    "method_definition",
94                    "function_expression",
95                ],
96                elision: "{ … }",
97                symbol_kinds: &[
98                    "function_declaration",
99                    "method_definition",
100                    "class_declaration",
101                ],
102                keeps_docstring: false,
103                ref_ident_kinds: &["identifier"],
104                binder_kinds: &["formal_parameters", "variable_declarator"],
105            },
106            Lang::TypeScript => LangSpec {
107                def_kinds: &[
108                    "function_declaration",
109                    "method_definition",
110                    "function_expression",
111                ],
112                elision: "{ … }",
113                symbol_kinds: &[
114                    "function_declaration",
115                    "method_definition",
116                    "class_declaration",
117                    "interface_declaration",
118                    "type_alias_declaration",
119                ],
120                keeps_docstring: false,
121                ref_ident_kinds: &["identifier", "type_identifier"],
122                binder_kinds: &[
123                    "formal_parameters",
124                    "variable_declarator",
125                    "required_parameter",
126                    "optional_parameter",
127                ],
128            },
129        }
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    #[test]
138    fn detects_languages_by_extension() {
139        assert_eq!(Lang::from_path("src/main.rs"), Some(Lang::Rust));
140        assert_eq!(Lang::from_path("a/b/c.py"), Some(Lang::Python));
141        assert_eq!(Lang::from_path("x.JSX"), Some(Lang::JavaScript));
142        assert_eq!(Lang::from_path("x.tsx"), Some(Lang::TypeScript));
143        assert_eq!(Lang::from_path("README.md"), None);
144        assert_eq!(Lang::from_path("noext"), None);
145    }
146}