Skip to main content

lean_ctx/core/
language_capabilities.rs

1#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2pub enum LanguageId {
3    Rust,
4    TypeScript,
5    JavaScript,
6    Python,
7    Go,
8    Java,
9    C,
10    Cpp,
11    Ruby,
12    CSharp,
13    Kotlin,
14    Swift,
15    Php,
16    Bash,
17    Dart,
18    Scala,
19    Elixir,
20    Zig,
21    Vue,
22    Svelte,
23}
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub struct LanguageCapabilities {
27    pub deps_edges: bool,
28    pub deep_queries: bool,
29    pub import_resolver: bool,
30}
31
32impl LanguageId {
33    pub fn id_str(&self) -> &'static str {
34        match self {
35            LanguageId::Rust => "rust",
36            LanguageId::TypeScript => "typescript",
37            LanguageId::JavaScript => "javascript",
38            LanguageId::Python => "python",
39            LanguageId::Go => "go",
40            LanguageId::Java => "java",
41            LanguageId::C => "c",
42            LanguageId::Cpp => "cpp",
43            LanguageId::Ruby => "ruby",
44            LanguageId::CSharp => "csharp",
45            LanguageId::Kotlin => "kotlin",
46            LanguageId::Swift => "swift",
47            LanguageId::Php => "php",
48            LanguageId::Bash => "bash",
49            LanguageId::Dart => "dart",
50            LanguageId::Scala => "scala",
51            LanguageId::Elixir => "elixir",
52            LanguageId::Zig => "zig",
53            LanguageId::Vue => "vue",
54            LanguageId::Svelte => "svelte",
55        }
56    }
57}
58
59pub fn capabilities(lang: LanguageId) -> LanguageCapabilities {
60    match lang {
61        // tree-sitter backed (deep_queries + resolver can be meaningful)
62        LanguageId::Rust
63        | LanguageId::TypeScript
64        | LanguageId::JavaScript
65        | LanguageId::Python
66        | LanguageId::Go
67        | LanguageId::Java
68        | LanguageId::C
69        | LanguageId::Cpp
70        | LanguageId::Ruby
71        | LanguageId::CSharp
72        | LanguageId::Kotlin
73        | LanguageId::Swift
74        | LanguageId::Php
75        | LanguageId::Bash
76        | LanguageId::Dart
77        | LanguageId::Scala
78        | LanguageId::Elixir
79        | LanguageId::Zig => LanguageCapabilities {
80            deps_edges: true,
81            deep_queries: true,
82            import_resolver: true,
83        },
84        // templating languages: we can extract deps edges, but no deep_queries/resolver.
85        LanguageId::Vue | LanguageId::Svelte => LanguageCapabilities {
86            deps_edges: true,
87            deep_queries: false,
88            import_resolver: false,
89        },
90    }
91}
92
93pub fn language_for_ext(ext: &str) -> Option<LanguageId> {
94    let e = ext.trim().trim_start_matches('.').to_lowercase();
95    match e.as_str() {
96        "rs" => Some(LanguageId::Rust),
97        "ts" | "tsx" => Some(LanguageId::TypeScript),
98        "js" | "jsx" => Some(LanguageId::JavaScript),
99        "py" => Some(LanguageId::Python),
100        "go" => Some(LanguageId::Go),
101        "java" => Some(LanguageId::Java),
102        "c" | "h" => Some(LanguageId::C),
103        "cpp" | "cc" | "cxx" | "hpp" | "hxx" | "hh" => Some(LanguageId::Cpp),
104        "rb" => Some(LanguageId::Ruby),
105        "cs" => Some(LanguageId::CSharp),
106        "kt" | "kts" => Some(LanguageId::Kotlin),
107        "swift" => Some(LanguageId::Swift),
108        "php" => Some(LanguageId::Php),
109        "sh" | "bash" => Some(LanguageId::Bash),
110        "dart" => Some(LanguageId::Dart),
111        "scala" | "sc" => Some(LanguageId::Scala),
112        "ex" | "exs" => Some(LanguageId::Elixir),
113        "zig" => Some(LanguageId::Zig),
114        "vue" => Some(LanguageId::Vue),
115        "svelte" => Some(LanguageId::Svelte),
116        _ => None,
117    }
118}
119
120pub fn language_for_path(path: &str) -> Option<LanguageId> {
121    std::path::Path::new(path)
122        .extension()
123        .and_then(|e| e.to_str())
124        .and_then(language_for_ext)
125}
126
127pub fn is_indexable_ext(ext: &str) -> bool {
128    language_for_ext(ext).is_some()
129}
130
131/// Every language the property graph / code-map can index, for capability
132/// enumeration and UI hints. Keep in sync with `language_for_ext`.
133pub const ALL_LANGUAGES: &[LanguageId] = &[
134    LanguageId::Rust,
135    LanguageId::TypeScript,
136    LanguageId::JavaScript,
137    LanguageId::Python,
138    LanguageId::Go,
139    LanguageId::Java,
140    LanguageId::C,
141    LanguageId::Cpp,
142    LanguageId::Ruby,
143    LanguageId::CSharp,
144    LanguageId::Kotlin,
145    LanguageId::Swift,
146    LanguageId::Php,
147    LanguageId::Bash,
148    LanguageId::Dart,
149    LanguageId::Scala,
150    LanguageId::Elixir,
151    LanguageId::Zig,
152    LanguageId::Vue,
153    LanguageId::Svelte,
154];
155
156/// Friendly names of every graph-indexable language (e.g. for an empty-graph hint).
157pub fn graph_supported_language_names() -> Vec<&'static str> {
158    ALL_LANGUAGES.iter().map(LanguageId::id_str).collect()
159}
160
161/// Maps a file extension to a human-readable *programming language* name that
162/// lean-ctx recognizes but does **not** graph-index. Returns `None` for
163/// graph-indexed languages and for non-code files (docs, data, config). Used
164/// only to explain an empty graph — e.g. a Lua/Luau project (#360).
165fn unsupported_source_language_name(ext: &str) -> Option<&'static str> {
166    match ext.trim().trim_start_matches('.').to_lowercase().as_str() {
167        "lua" => Some("Lua"),
168        "luau" => Some("Luau"),
169        "r" => Some("R"),
170        "jl" => Some("Julia"),
171        "nim" => Some("Nim"),
172        "cr" => Some("Crystal"),
173        "clj" | "cljs" | "cljc" => Some("Clojure"),
174        "erl" | "hrl" => Some("Erlang"),
175        "hs" => Some("Haskell"),
176        "ml" | "mli" => Some("OCaml"),
177        "fs" | "fsx" => Some("F#"),
178        "pl" | "pm" => Some("Perl"),
179        "groovy" | "gradle" => Some("Groovy"),
180        "tf" => Some("Terraform"),
181        "sol" => Some("Solidity"),
182        "f90" | "f95" | "f03" => Some("Fortran"),
183        "pas" => Some("Pascal"),
184        "d" => Some("D"),
185        "sql" => Some("SQL"),
186        "tcl" => Some("Tcl"),
187        "raku" | "rakumod" => Some("Raku"),
188        _ => None,
189    }
190}
191
192/// Bounded project scan returning programming languages present in `root` that
193/// lean-ctx does **not** graph-index, with file counts (descending, capped to 5).
194/// Honors .gitignore/hidden like the graph walker and stops after `max_entries`
195/// filesystem entries. Lets the dashboard turn a confusing empty graph into a
196/// clear "Lua is not graph-indexed" message instead of an endless loading state.
197pub fn scan_unsupported_source_languages(root: &str, max_entries: usize) -> Vec<(String, usize)> {
198    let mut counts: std::collections::HashMap<&'static str, usize> =
199        std::collections::HashMap::new();
200    let walker = ignore::WalkBuilder::new(root)
201        .hidden(true)
202        .git_ignore(true)
203        .git_global(true)
204        .git_exclude(true)
205        .max_depth(Some(20))
206        .build();
207    for entry in walker.flatten().take(max_entries) {
208        if !entry.file_type().is_some_and(|ft| ft.is_file()) {
209            continue;
210        }
211        let ext = entry
212            .path()
213            .extension()
214            .and_then(|e| e.to_str())
215            .unwrap_or("");
216        if let Some(name) = unsupported_source_language_name(ext) {
217            *counts.entry(name).or_default() += 1;
218        }
219    }
220    let mut ranked: Vec<(String, usize)> = counts
221        .into_iter()
222        .map(|(k, c)| (k.to_string(), c))
223        .collect();
224    ranked.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
225    ranked.truncate(5);
226    ranked
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    #[test]
234    fn ext_mapping_basic() {
235        assert_eq!(language_for_ext("rs"), Some(LanguageId::Rust));
236        assert_eq!(language_for_ext(".tsx"), Some(LanguageId::TypeScript));
237        assert_eq!(language_for_ext("JS"), Some(LanguageId::JavaScript));
238        assert_eq!(language_for_ext("hxx"), Some(LanguageId::Cpp));
239        assert_eq!(language_for_ext("exs"), Some(LanguageId::Elixir));
240        assert_eq!(language_for_ext("unknown"), None);
241    }
242
243    #[test]
244    fn indexable_ext_true_for_known() {
245        assert!(is_indexable_ext("rs"));
246        assert!(is_indexable_ext("vue"));
247        assert!(!is_indexable_ext("md"));
248    }
249
250    #[test]
251    fn caps_are_deterministic() {
252        let c1 = capabilities(LanguageId::Rust);
253        let c2 = capabilities(LanguageId::Rust);
254        assert_eq!(c1, c2);
255        assert!(c1.deps_edges);
256    }
257
258    #[test]
259    fn all_languages_match_ext_table() {
260        // Every enumerated language must be reachable via at least one extension,
261        // so the UI's "supported languages" list never drifts from reality.
262        for lang in ALL_LANGUAGES {
263            let names = graph_supported_language_names();
264            assert!(names.contains(&lang.id_str()));
265        }
266        assert!(graph_supported_language_names().contains(&"rust"));
267        assert_eq!(ALL_LANGUAGES.len(), graph_supported_language_names().len());
268    }
269
270    #[test]
271    fn unsupported_source_languages_named_but_not_indexed() {
272        // Lua/Luau (issue #360) are recognized as code yet never graph-indexed.
273        assert_eq!(unsupported_source_language_name("lua"), Some("Lua"));
274        assert_eq!(unsupported_source_language_name(".luau"), Some("Luau"));
275        assert!(!is_indexable_ext("lua"));
276        assert!(!is_indexable_ext("luau"));
277        // Graph-indexed languages and plain data/docs are not reported as "unsupported code".
278        assert_eq!(unsupported_source_language_name("rs"), None);
279        assert_eq!(unsupported_source_language_name("md"), None);
280        assert_eq!(unsupported_source_language_name("json"), None);
281    }
282
283    #[test]
284    fn scan_reports_lua_project() {
285        let dir = tempfile::tempdir().unwrap();
286        std::fs::write(dir.path().join("init.lua"), "local x = 1").unwrap();
287        std::fs::write(dir.path().join("mod.luau"), "return {}").unwrap();
288        std::fs::write(dir.path().join("README.md"), "# docs").unwrap();
289        let found = scan_unsupported_source_languages(&dir.path().to_string_lossy(), 1000);
290        let names: Vec<&str> = found.iter().map(|(n, _)| n.as_str()).collect();
291        assert!(names.contains(&"Lua"));
292        assert!(names.contains(&"Luau"));
293    }
294}