Skip to main content

mcp_memory/code/
lang.rs

1//! Language registry for code-symbol indexing.
2//!
3//! Maps file extensions to a [`Lang`] and lazily builds the per-language
4//! [`TagsConfiguration`] (from each grammar's bundled `tags.scm`). Building a
5//! tags configuration compiles a tree-sitter query, so the configs are cached
6//! in a process-wide `OnceLock` and shared across every parsed file.
7
8use std::collections::HashMap;
9use std::path::Path;
10use std::sync::OnceLock;
11
12use tree_sitter_tags::TagsConfiguration;
13
14/// A source language we can extract symbols from.
15#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
16pub enum Lang {
17    Rust,
18    Python,
19    JavaScript,
20    TypeScript,
21    Tsx,
22    Go,
23    Java,
24}
25
26impl Lang {
27    /// Stable lowercase identifier stored in the graph (`lang:` observation).
28    pub const fn name(self) -> &'static str {
29        match self {
30            Lang::Rust => "rust",
31            Lang::Python => "python",
32            Lang::JavaScript => "javascript",
33            Lang::TypeScript => "typescript",
34            Lang::Tsx => "tsx",
35            Lang::Go => "go",
36            Lang::Java => "java",
37        }
38    }
39
40    fn language(self) -> tree_sitter::Language {
41        match self {
42            Lang::Rust => tree_sitter_rust::LANGUAGE.into(),
43            Lang::Python => tree_sitter_python::LANGUAGE.into(),
44            Lang::JavaScript => tree_sitter_javascript::LANGUAGE.into(),
45            Lang::TypeScript => tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
46            Lang::Tsx => tree_sitter_typescript::LANGUAGE_TSX.into(),
47            Lang::Go => tree_sitter_go::LANGUAGE.into(),
48            Lang::Java => tree_sitter_java::LANGUAGE.into(),
49        }
50    }
51
52    const fn tags_query(self) -> &'static str {
53        match self {
54            Lang::Rust => tree_sitter_rust::TAGS_QUERY,
55            Lang::Python => tree_sitter_python::TAGS_QUERY,
56            Lang::JavaScript => tree_sitter_javascript::TAGS_QUERY,
57            Lang::TypeScript | Lang::Tsx => tree_sitter_typescript::TAGS_QUERY,
58            Lang::Go => tree_sitter_go::TAGS_QUERY,
59            Lang::Java => tree_sitter_java::TAGS_QUERY,
60        }
61    }
62
63    const fn all() -> [Lang; 7] {
64        [
65            Lang::Rust,
66            Lang::Python,
67            Lang::JavaScript,
68            Lang::TypeScript,
69            Lang::Tsx,
70            Lang::Go,
71            Lang::Java,
72        ]
73    }
74}
75
76/// Resolve a path's extension to a [`Lang`], or `None` if unsupported.
77pub fn detect(path: &Path) -> Option<Lang> {
78    let ext = path.extension()?.to_str()?.to_ascii_lowercase();
79    Some(match ext.as_str() {
80        "rs" => Lang::Rust,
81        "py" | "pyi" => Lang::Python,
82        "js" | "jsx" | "mjs" | "cjs" => Lang::JavaScript,
83        "ts" | "mts" | "cts" => Lang::TypeScript,
84        "tsx" => Lang::Tsx,
85        "go" => Lang::Go,
86        "java" => Lang::Java,
87        _ => return None,
88    })
89}
90
91/// Process-wide cache of compiled tags configurations, built on first use.
92/// Languages whose query fails to compile are simply absent (and skipped).
93fn configs() -> &'static HashMap<Lang, TagsConfiguration> {
94    static CONFIGS: OnceLock<HashMap<Lang, TagsConfiguration>> = OnceLock::new();
95    CONFIGS.get_or_init(|| {
96        let mut m = HashMap::new();
97        for lang in Lang::all() {
98            match TagsConfiguration::new(lang.language(), lang.tags_query(), "") {
99                Ok(cfg) => {
100                    m.insert(lang, cfg);
101                }
102                Err(e) => {
103                    tracing::warn!("code: tags config for {} failed: {e}", lang.name());
104                }
105            }
106        }
107        m
108    })
109}
110
111/// The compiled tags configuration for a language, if available.
112pub fn config(lang: Lang) -> Option<&'static TagsConfiguration> {
113    configs().get(&lang)
114}