Skip to main content

code_analyze_core/
lang.rs

1// SPDX-FileCopyrightText: 2026 code-analyze-mcp contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Language detection by file extension.
4//!
5//! Maps file extensions to supported language identifiers.
6
7const EXTENSION_MAP: &[(&str, &str)] = &[
8    #[cfg(feature = "lang-cpp")]
9    ("c", "c"),
10    #[cfg(feature = "lang-cpp")]
11    ("cc", "cpp"),
12    #[cfg(feature = "lang-javascript")]
13    ("cjs", "javascript"),
14    #[cfg(feature = "lang-cpp")]
15    ("cpp", "cpp"),
16    #[cfg(feature = "lang-cpp")]
17    ("cxx", "cpp"),
18    #[cfg(feature = "lang-fortran")]
19    ("f", "fortran"),
20    #[cfg(feature = "lang-fortran")]
21    ("f03", "fortran"),
22    #[cfg(feature = "lang-fortran")]
23    ("f08", "fortran"),
24    #[cfg(feature = "lang-fortran")]
25    ("f77", "fortran"),
26    #[cfg(feature = "lang-fortran")]
27    ("f90", "fortran"),
28    #[cfg(feature = "lang-fortran")]
29    ("f95", "fortran"),
30    #[cfg(feature = "lang-fortran")]
31    ("for", "fortran"),
32    #[cfg(feature = "lang-fortran")]
33    ("ftn", "fortran"),
34    #[cfg(feature = "lang-cpp")]
35    ("h", "cpp"),
36    #[cfg(feature = "lang-csharp")]
37    ("cs", "csharp"),
38    #[cfg(feature = "lang-cpp")]
39    ("hpp", "cpp"),
40    #[cfg(feature = "lang-cpp")]
41    ("hxx", "cpp"),
42    #[cfg(feature = "lang-javascript")]
43    ("js", "javascript"),
44    #[cfg(feature = "lang-javascript")]
45    ("mjs", "javascript"),
46    #[cfg(feature = "lang-go")]
47    ("go", "go"),
48    #[cfg(feature = "lang-java")]
49    ("java", "java"),
50    #[cfg(feature = "lang-python")]
51    ("py", "python"),
52    #[cfg(feature = "lang-rust")]
53    ("rs", "rust"),
54    #[cfg(feature = "lang-typescript")]
55    ("ts", "typescript"),
56    #[cfg(feature = "lang-tsx")]
57    ("tsx", "tsx"),
58];
59
60/// Returns the language identifier for the given file extension, or `None` if unsupported.
61///
62/// The lookup is case-insensitive. Supported extensions include `rs`, `py`, `go`, `java`,
63/// `ts`, `tsx`, `f90`, `f95`, `for`, `ftn`, and other Fortran variants, as well as
64/// `js`, `mjs`, `cjs`, `c`, `cc`, `cpp`, `cxx`, `h`, `hpp`, `hxx`, and `cs`.
65#[must_use]
66pub fn language_for_extension(ext: &str) -> Option<&'static str> {
67    EXTENSION_MAP
68        .iter()
69        .find(|(e, _)| e.eq_ignore_ascii_case(ext))
70        .map(|(_, lang)| *lang)
71}
72
73/// Returns a static slice of all supported language names based on compiled features.
74///
75/// The returned slice contains language identifiers like `"rust"`, `"python"`, `"go"`, etc.,
76/// depending on which language features are enabled at compile time.
77#[must_use]
78pub fn supported_languages() -> &'static [&'static str] {
79    &[
80        #[cfg(feature = "lang-rust")]
81        "rust",
82        #[cfg(feature = "lang-go")]
83        "go",
84        #[cfg(feature = "lang-java")]
85        "java",
86        #[cfg(feature = "lang-python")]
87        "python",
88        #[cfg(feature = "lang-typescript")]
89        "typescript",
90        #[cfg(feature = "lang-tsx")]
91        "tsx",
92        #[cfg(feature = "lang-javascript")]
93        "javascript",
94        #[cfg(feature = "lang-fortran")]
95        "fortran",
96        #[cfg(feature = "lang-cpp")]
97        "c",
98        #[cfg(feature = "lang-cpp")]
99        "cpp",
100        #[cfg(feature = "lang-csharp")]
101        "csharp",
102    ]
103}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108
109    #[test]
110    fn test_language_for_extension_happy_path() {
111        #[cfg(feature = "lang-rust")]
112        assert_eq!(language_for_extension("rs"), Some("rust"));
113        #[cfg(feature = "lang-python")]
114        assert_eq!(language_for_extension("py"), Some("python"));
115        #[cfg(feature = "lang-go")]
116        assert_eq!(language_for_extension("go"), Some("go"));
117        #[cfg(feature = "lang-java")]
118        assert_eq!(language_for_extension("java"), Some("java"));
119        #[cfg(feature = "lang-typescript")]
120        assert_eq!(language_for_extension("ts"), Some("typescript"));
121        #[cfg(feature = "lang-tsx")]
122        assert_eq!(language_for_extension("tsx"), Some("tsx"));
123        #[cfg(feature = "lang-fortran")]
124        assert_eq!(language_for_extension("f90"), Some("fortran"));
125        #[cfg(feature = "lang-fortran")]
126        assert_eq!(language_for_extension("for"), Some("fortran"));
127        #[cfg(feature = "lang-fortran")]
128        assert_eq!(language_for_extension("ftn"), Some("fortran"));
129        #[cfg(feature = "lang-cpp")]
130        assert_eq!(language_for_extension("c"), Some("c"));
131        #[cfg(feature = "lang-cpp")]
132        assert_eq!(language_for_extension("cpp"), Some("cpp"));
133        #[cfg(feature = "lang-cpp")]
134        assert_eq!(language_for_extension("h"), Some("cpp"));
135        #[cfg(feature = "lang-cpp")]
136        assert_eq!(language_for_extension("hpp"), Some("cpp"));
137        #[cfg(feature = "lang-cpp")]
138        assert_eq!(language_for_extension("cc"), Some("cpp"));
139    }
140
141    #[test]
142    fn test_language_for_extension_edge_case() {
143        assert_eq!(language_for_extension("unknown"), None);
144        assert_eq!(language_for_extension(""), None);
145        #[cfg(feature = "lang-rust")]
146        assert_eq!(language_for_extension("RS"), Some("rust"));
147        // Uppercase Fortran extensions resolved via eq_ignore_ascii_case
148        #[cfg(feature = "lang-fortran")]
149        assert_eq!(language_for_extension("F90"), Some("fortran"));
150        #[cfg(feature = "lang-fortran")]
151        assert_eq!(language_for_extension("FOR"), Some("fortran"));
152    }
153}