Skip to main content

mcp_memory/code/
lang.rs

1//! Language registry for code-symbol indexing.
2//!
3//! Maps file extensions to a [`Lang`] and lazily builds the per-language
4//! [`TagsConfiguration`] (from each grammar's bundled `tags.scm`). Building a
5//! tags configuration compiles a tree-sitter query, so the configs are cached
6//! in a process-wide `OnceLock` and shared across every parsed file.
7
8use std::collections::HashMap;
9use std::path::Path;
10use std::sync::OnceLock;
11
12use tree_sitter_tags::TagsConfiguration;
13
14/// A source language we can extract symbols from.
15#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
16pub enum Lang {
17    Rust,
18    Python,
19    JavaScript,
20    TypeScript,
21    Tsx,
22    Go,
23    Java,
24    C,
25    Cpp,
26    Ruby,
27    Php,
28}
29
30impl Lang {
31    /// Stable lowercase identifier stored in the graph (`lang:` observation).
32    pub const fn name(self) -> &'static str {
33        match self {
34            Lang::Rust => "rust",
35            Lang::Python => "python",
36            Lang::JavaScript => "javascript",
37            Lang::TypeScript => "typescript",
38            Lang::Tsx => "tsx",
39            Lang::Go => "go",
40            Lang::Java => "java",
41            Lang::C => "c",
42            Lang::Cpp => "cpp",
43            Lang::Ruby => "ruby",
44            Lang::Php => "php",
45        }
46    }
47
48    fn language(self) -> tree_sitter::Language {
49        match self {
50            Lang::Rust => tree_sitter_rust::LANGUAGE.into(),
51            Lang::Python => tree_sitter_python::LANGUAGE.into(),
52            Lang::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
53            Lang::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
54            Lang::Tsx => tree_sitter_typescript::LANGUAGE_TSX.into(),
55            Lang::Go => tree_sitter_go::LANGUAGE.into(),
56            Lang::Java => tree_sitter_java::LANGUAGE.into(),
57            Lang::C => tree_sitter_c::LANGUAGE.into(),
58            Lang::Cpp => tree_sitter_cpp::LANGUAGE.into(),
59            Lang::Ruby => tree_sitter_ruby::LANGUAGE.into(),
60            Lang::Php => tree_sitter_php::LANGUAGE_PHP.into(),
61        }
62    }
63
64    const fn tags_query(self) -> &'static str {
65        match self {
66            Lang::Rust => tree_sitter_rust::TAGS_QUERY,
67            Lang::Python => tree_sitter_python::TAGS_QUERY,
68            Lang::JavaScript => tree_sitter_javascript::TAGS_QUERY,
69            Lang::TypeScript | Lang::Tsx => tree_sitter_typescript::TAGS_QUERY,
70            Lang::Go => tree_sitter_go::TAGS_QUERY,
71            Lang::Java => tree_sitter_java::TAGS_QUERY,
72            Lang::C => tree_sitter_c::TAGS_QUERY,
73            Lang::Cpp => tree_sitter_cpp::TAGS_QUERY,
74            Lang::Ruby => tree_sitter_ruby::TAGS_QUERY,
75            Lang::Php => tree_sitter_php::TAGS_QUERY,
76        }
77    }
78
79    pub(crate) const fn all() -> [Lang; 11] {
80        [
81            Lang::Rust,
82            Lang::Python,
83            Lang::JavaScript,
84            Lang::TypeScript,
85            Lang::Tsx,
86            Lang::Go,
87            Lang::Java,
88            Lang::C,
89            Lang::Cpp,
90            Lang::Ruby,
91            Lang::Php,
92        ]
93    }
94}
95
96/// Resolve a path's extension to a [`Lang`], or `None` if unsupported.
97pub fn detect(path: &Path) -> Option<Lang> {
98    let ext = path.extension()?.to_str()?.to_ascii_lowercase();
99    Some(match ext.as_str() {
100        "rs" => Lang::Rust,
101        "py" | "pyi" => Lang::Python,
102        "js" | "jsx" | "mjs" | "cjs" => Lang::JavaScript,
103        "ts" | "mts" | "cts" => Lang::TypeScript,
104        "tsx" => Lang::Tsx,
105        "go" => Lang::Go,
106        "java" => Lang::Java,
107        "c" | "h" => Lang::C,
108        "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Lang::Cpp,
109        "rb" => Lang::Ruby,
110        "php" | "phtml" | "php3" | "php4" | "php5" => Lang::Php,
111        _ => return None,
112    })
113}
114
115/// Process-wide cache of compiled tags configurations, built on first use.
116/// Languages whose query fails to compile are simply absent (and skipped).
117fn configs() -> &'static HashMap<Lang, TagsConfiguration> {
118    static CONFIGS: OnceLock<HashMap<Lang, TagsConfiguration>> = OnceLock::new();
119    CONFIGS.get_or_init(|| {
120        let mut m = HashMap::new();
121        for lang in Lang::all() {
122            match TagsConfiguration::new(lang.language(), lang.tags_query(), "") {
123                Ok(cfg) => {
124                    m.insert(lang, cfg);
125                }
126                Err(e) => {
127                    tracing::warn!("code: tags config for {} failed: {e}", lang.name());
128                }
129            }
130        }
131        m
132    })
133}
134
135/// The compiled tags configuration for a language, if available.
136pub fn config(lang: Lang) -> Option<&'static TagsConfiguration> {
137    configs().get(&lang)
138}