Skip to main content

aptu_coder_core/
lang.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Language detection by file extension.
4//!
5//! Maps file extensions to supported language identifiers.
6
7const EXTENSION_MAP: &[(&str, &str)] = &[
8    #[cfg(feature = "lang-cpp")]
9    ("c", "c"),
10    #[cfg(feature = "lang-cpp")]
11    ("cc", "cpp"),
12    #[cfg(feature = "lang-javascript")]
13    ("cjs", "javascript"),
14    #[cfg(feature = "lang-cpp")]
15    ("cpp", "cpp"),
16    #[cfg(feature = "lang-cpp")]
17    ("cxx", "cpp"),
18    #[cfg(feature = "lang-fortran")]
19    ("f", "fortran"),
20    #[cfg(feature = "lang-fortran")]
21    ("f03", "fortran"),
22    #[cfg(feature = "lang-fortran")]
23    ("f08", "fortran"),
24    #[cfg(feature = "lang-fortran")]
25    ("f77", "fortran"),
26    #[cfg(feature = "lang-fortran")]
27    ("f90", "fortran"),
28    #[cfg(feature = "lang-fortran")]
29    ("f95", "fortran"),
30    #[cfg(feature = "lang-fortran")]
31    ("for", "fortran"),
32    #[cfg(feature = "lang-fortran")]
33    ("ftn", "fortran"),
34    #[cfg(feature = "lang-cpp")]
35    ("h", "cpp"),
36    #[cfg(feature = "lang-csharp")]
37    ("cs", "csharp"),
38    #[cfg(feature = "lang-cpp")]
39    ("hpp", "cpp"),
40    #[cfg(feature = "lang-cpp")]
41    ("hxx", "cpp"),
42    #[cfg(feature = "lang-javascript")]
43    ("js", "javascript"),
44    #[cfg(feature = "lang-javascript")]
45    ("mjs", "javascript"),
46    #[cfg(feature = "lang-go")]
47    ("go", "go"),
48    #[cfg(feature = "lang-java")]
49    ("java", "java"),
50    #[cfg(feature = "lang-kotlin")]
51    ("kt", "kotlin"),
52    #[cfg(feature = "lang-kotlin")]
53    ("kts", "kotlin"),
54    #[cfg(feature = "lang-python")]
55    ("py", "python"),
56    #[cfg(feature = "lang-rust")]
57    ("rs", "rust"),
58    #[cfg(feature = "lang-typescript")]
59    ("ts", "typescript"),
60    #[cfg(feature = "lang-tsx")]
61    ("tsx", "tsx"),
62    #[cfg(feature = "lang-html")]
63    ("html", "html"),
64    #[cfg(feature = "lang-html")]
65    ("htm", "html"),
66    #[cfg(feature = "lang-markdown")]
67    ("md", "markdown"),
68    #[cfg(feature = "lang-markdown")]
69    ("mdx", "markdown"),
70    ("astro", "astro"),
71    ("css", "css"),
72    ("yaml", "yaml"),
73    ("yml", "yaml"),
74    ("json", "json"),
75    ("toml", "toml"),
76];
77
78/// Returns the language identifier for the given file extension, or `None` if unsupported.
79///
80/// The lookup is case-insensitive. Supported extensions include `rs`, `py`, `go`, `java`,
81/// `ts`, `tsx`, `js`, `mjs`, `cjs`, `c`, `cc`, `cpp`, `cxx`, `h`, `hpp`, `hxx`, `cs`,
82/// Fortran variants `f`, `f77`, `f90`, `f95`, `f03`, `f08`, `for`, `ftn`,
83/// HTML variants `html`, `htm`, and Markdown variants `md`, `mdx`.
84#[must_use]
85pub fn language_for_extension(ext: &str) -> Option<&'static str> {
86    EXTENSION_MAP
87        .iter()
88        .find(|(e, _)| e.eq_ignore_ascii_case(ext))
89        .map(|(_, lang)| *lang)
90}
91
92/// Returns all file extensions supported by the compiled feature set.
93///
94/// Each entry corresponds to one row in `EXTENSION_MAP`. The list is used to
95/// build human-readable error messages without duplicating the extension list.
96#[must_use]
97pub fn supported_extensions() -> Vec<&'static str> {
98    EXTENSION_MAP.iter().map(|(ext, _)| *ext).collect()
99}
100
101/// Returns a static slice of all supported language names based on compiled features.
102///
103/// The returned slice contains language identifiers like `"rust"`, `"python"`, `"go"`, etc.,
104/// depending on which language features are enabled at compile time.
105#[must_use]
106pub fn supported_languages() -> &'static [&'static str] {
107    &[
108        #[cfg(feature = "lang-rust")]
109        "rust",
110        #[cfg(feature = "lang-go")]
111        "go",
112        #[cfg(feature = "lang-java")]
113        "java",
114        #[cfg(feature = "lang-kotlin")]
115        "kotlin",
116        #[cfg(feature = "lang-python")]
117        "python",
118        #[cfg(feature = "lang-typescript")]
119        "typescript",
120        #[cfg(feature = "lang-tsx")]
121        "tsx",
122        #[cfg(feature = "lang-javascript")]
123        "javascript",
124        #[cfg(feature = "lang-fortran")]
125        "fortran",
126        #[cfg(feature = "lang-cpp")]
127        "c",
128        #[cfg(feature = "lang-cpp")]
129        "cpp",
130        #[cfg(feature = "lang-csharp")]
131        "csharp",
132        #[cfg(feature = "lang-html")]
133        "html",
134        #[cfg(feature = "lang-markdown")]
135        "markdown",
136    ]
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    #[test]
144    fn test_language_for_extension_happy_path() {
145        #[cfg(feature = "lang-rust")]
146        assert_eq!(language_for_extension("rs"), Some("rust"));
147        #[cfg(feature = "lang-python")]
148        assert_eq!(language_for_extension("py"), Some("python"));
149        #[cfg(feature = "lang-go")]
150        assert_eq!(language_for_extension("go"), Some("go"));
151        #[cfg(feature = "lang-java")]
152        assert_eq!(language_for_extension("java"), Some("java"));
153        #[cfg(feature = "lang-typescript")]
154        assert_eq!(language_for_extension("ts"), Some("typescript"));
155        #[cfg(feature = "lang-tsx")]
156        assert_eq!(language_for_extension("tsx"), Some("tsx"));
157        #[cfg(feature = "lang-fortran")]
158        assert_eq!(language_for_extension("f90"), Some("fortran"));
159        #[cfg(feature = "lang-fortran")]
160        assert_eq!(language_for_extension("for"), Some("fortran"));
161        #[cfg(feature = "lang-fortran")]
162        assert_eq!(language_for_extension("ftn"), Some("fortran"));
163        #[cfg(feature = "lang-cpp")]
164        assert_eq!(language_for_extension("c"), Some("c"));
165        #[cfg(feature = "lang-cpp")]
166        assert_eq!(language_for_extension("cpp"), Some("cpp"));
167        #[cfg(feature = "lang-cpp")]
168        assert_eq!(language_for_extension("h"), Some("cpp"));
169        #[cfg(feature = "lang-cpp")]
170        assert_eq!(language_for_extension("hpp"), Some("cpp"));
171        #[cfg(feature = "lang-cpp")]
172        assert_eq!(language_for_extension("cc"), Some("cpp"));
173        #[cfg(feature = "lang-kotlin")]
174        assert_eq!(language_for_extension("kt"), Some("kotlin"));
175        #[cfg(feature = "lang-kotlin")]
176        assert_eq!(language_for_extension("kts"), Some("kotlin"));
177    }
178
179    /// Asserts every extension in `EXTENSION_MAP` appears as an alternation in
180    /// `SUPPORTED_FILE_EXT_PATTERN`, preventing drift when a new language is added.
181    /// The check is a substring match: the pattern has the form `...(ext1|ext2|...)...`
182    /// so each extension must appear as `ext|` or `ext)`.
183    #[test]
184    fn test_supported_file_ext_pattern_covers_all_extension_map_entries() {
185        #[cfg(feature = "schemars")]
186        for (ext, _lang) in EXTENSION_MAP {
187            let in_alternation = crate::schema_helpers::SUPPORTED_FILE_EXT_PATTERN
188                .contains(&format!("{ext}|"))
189                || crate::schema_helpers::SUPPORTED_FILE_EXT_PATTERN.contains(&format!("{ext})"));
190            assert!(
191                in_alternation,
192                "SUPPORTED_FILE_EXT_PATTERN is missing extension '{ext}' from EXTENSION_MAP; \
193                 add it to schema_helpers.rs"
194            );
195        }
196    }
197
198    #[test]
199    fn test_language_for_extension_edge_case() {
200        assert_eq!(language_for_extension("unknown"), None);
201        assert_eq!(language_for_extension(""), None);
202        #[cfg(feature = "lang-rust")]
203        assert_eq!(language_for_extension("RS"), Some("rust"));
204        // Uppercase Fortran extensions resolved via eq_ignore_ascii_case
205        #[cfg(feature = "lang-fortran")]
206        assert_eq!(language_for_extension("F90"), Some("fortran"));
207        #[cfg(feature = "lang-fortran")]
208        assert_eq!(language_for_extension("FOR"), Some("fortran"));
209    }
210}