Skip to main content

aptu_coder_core/
lang.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Language detection by file extension.
4//!
5//! Maps file extensions to supported language identifiers.
6
7const EXTENSION_MAP: &[(&str, &str)] = &[
8    #[cfg(feature = "lang-cpp")]
9    ("c", "c"),
10    #[cfg(feature = "lang-cpp")]
11    ("cc", "cpp"),
12    #[cfg(feature = "lang-javascript")]
13    ("cjs", "javascript"),
14    #[cfg(feature = "lang-cpp")]
15    ("cpp", "cpp"),
16    #[cfg(feature = "lang-cpp")]
17    ("cxx", "cpp"),
18    #[cfg(feature = "lang-fortran")]
19    ("f", "fortran"),
20    #[cfg(feature = "lang-fortran")]
21    ("f03", "fortran"),
22    #[cfg(feature = "lang-fortran")]
23    ("f08", "fortran"),
24    #[cfg(feature = "lang-fortran")]
25    ("f77", "fortran"),
26    #[cfg(feature = "lang-fortran")]
27    ("f90", "fortran"),
28    #[cfg(feature = "lang-fortran")]
29    ("f95", "fortran"),
30    #[cfg(feature = "lang-fortran")]
31    ("for", "fortran"),
32    #[cfg(feature = "lang-fortran")]
33    ("ftn", "fortran"),
34    #[cfg(feature = "lang-cpp")]
35    ("h", "cpp"),
36    #[cfg(feature = "lang-csharp")]
37    ("cs", "csharp"),
38    #[cfg(feature = "lang-cpp")]
39    ("hpp", "cpp"),
40    #[cfg(feature = "lang-cpp")]
41    ("hxx", "cpp"),
42    #[cfg(feature = "lang-javascript")]
43    ("js", "javascript"),
44    #[cfg(feature = "lang-javascript")]
45    ("mjs", "javascript"),
46    #[cfg(feature = "lang-go")]
47    ("go", "go"),
48    #[cfg(feature = "lang-java")]
49    ("java", "java"),
50    #[cfg(feature = "lang-kotlin")]
51    ("kt", "kotlin"),
52    #[cfg(feature = "lang-kotlin")]
53    ("kts", "kotlin"),
54    #[cfg(feature = "lang-python")]
55    ("py", "python"),
56    #[cfg(feature = "lang-rust")]
57    ("rs", "rust"),
58    #[cfg(feature = "lang-typescript")]
59    ("ts", "typescript"),
60    #[cfg(feature = "lang-tsx")]
61    ("tsx", "tsx"),
62];
63
64/// Returns the language identifier for the given file extension, or `None` if unsupported.
65///
66/// The lookup is case-insensitive. Supported extensions include `rs`, `py`, `go`, `java`,
67/// `ts`, `tsx`, `js`, `mjs`, `cjs`, `c`, `cc`, `cpp`, `cxx`, `h`, `hpp`, `hxx`, `cs`,
68/// and Fortran variants `f`, `f77`, `f90`, `f95`, `f03`, `f08`, `for`, `ftn`.
69#[must_use]
70pub fn language_for_extension(ext: &str) -> Option<&'static str> {
71    EXTENSION_MAP
72        .iter()
73        .find(|(e, _)| e.eq_ignore_ascii_case(ext))
74        .map(|(_, lang)| *lang)
75}
76
77/// Returns all file extensions supported by the compiled feature set.
78///
79/// Each entry corresponds to one row in `EXTENSION_MAP`. The list is used to
80/// build human-readable error messages without duplicating the extension list.
81#[must_use]
82pub fn supported_extensions() -> Vec<&'static str> {
83    EXTENSION_MAP.iter().map(|(ext, _)| *ext).collect()
84}
85
86/// Returns a static slice of all supported language names based on compiled features.
87///
88/// The returned slice contains language identifiers like `"rust"`, `"python"`, `"go"`, etc.,
89/// depending on which language features are enabled at compile time.
90#[must_use]
91pub fn supported_languages() -> &'static [&'static str] {
92    &[
93        #[cfg(feature = "lang-rust")]
94        "rust",
95        #[cfg(feature = "lang-go")]
96        "go",
97        #[cfg(feature = "lang-java")]
98        "java",
99        #[cfg(feature = "lang-kotlin")]
100        "kotlin",
101        #[cfg(feature = "lang-python")]
102        "python",
103        #[cfg(feature = "lang-typescript")]
104        "typescript",
105        #[cfg(feature = "lang-tsx")]
106        "tsx",
107        #[cfg(feature = "lang-javascript")]
108        "javascript",
109        #[cfg(feature = "lang-fortran")]
110        "fortran",
111        #[cfg(feature = "lang-cpp")]
112        "c",
113        #[cfg(feature = "lang-cpp")]
114        "cpp",
115        #[cfg(feature = "lang-csharp")]
116        "csharp",
117    ]
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123
124    #[test]
125    fn test_language_for_extension_happy_path() {
126        #[cfg(feature = "lang-rust")]
127        assert_eq!(language_for_extension("rs"), Some("rust"));
128        #[cfg(feature = "lang-python")]
129        assert_eq!(language_for_extension("py"), Some("python"));
130        #[cfg(feature = "lang-go")]
131        assert_eq!(language_for_extension("go"), Some("go"));
132        #[cfg(feature = "lang-java")]
133        assert_eq!(language_for_extension("java"), Some("java"));
134        #[cfg(feature = "lang-typescript")]
135        assert_eq!(language_for_extension("ts"), Some("typescript"));
136        #[cfg(feature = "lang-tsx")]
137        assert_eq!(language_for_extension("tsx"), Some("tsx"));
138        #[cfg(feature = "lang-fortran")]
139        assert_eq!(language_for_extension("f90"), Some("fortran"));
140        #[cfg(feature = "lang-fortran")]
141        assert_eq!(language_for_extension("for"), Some("fortran"));
142        #[cfg(feature = "lang-fortran")]
143        assert_eq!(language_for_extension("ftn"), Some("fortran"));
144        #[cfg(feature = "lang-cpp")]
145        assert_eq!(language_for_extension("c"), Some("c"));
146        #[cfg(feature = "lang-cpp")]
147        assert_eq!(language_for_extension("cpp"), Some("cpp"));
148        #[cfg(feature = "lang-cpp")]
149        assert_eq!(language_for_extension("h"), Some("cpp"));
150        #[cfg(feature = "lang-cpp")]
151        assert_eq!(language_for_extension("hpp"), Some("cpp"));
152        #[cfg(feature = "lang-cpp")]
153        assert_eq!(language_for_extension("cc"), Some("cpp"));
154        #[cfg(feature = "lang-kotlin")]
155        assert_eq!(language_for_extension("kt"), Some("kotlin"));
156        #[cfg(feature = "lang-kotlin")]
157        assert_eq!(language_for_extension("kts"), Some("kotlin"));
158    }
159
160    /// Asserts every extension in `EXTENSION_MAP` appears as an alternation in
161    /// `SUPPORTED_FILE_EXT_PATTERN`, preventing drift when a new language is added.
162    /// The check is a substring match: the pattern has the form `...(ext1|ext2|...)...`
163    /// so each extension must appear as `ext|` or `ext)`.
164    #[test]
165    fn test_supported_file_ext_pattern_covers_all_extension_map_entries() {
166        #[cfg(feature = "schemars")]
167        for (ext, _lang) in EXTENSION_MAP {
168            let in_alternation = crate::schema_helpers::SUPPORTED_FILE_EXT_PATTERN
169                .contains(&format!("{ext}|"))
170                || crate::schema_helpers::SUPPORTED_FILE_EXT_PATTERN.contains(&format!("{ext})"));
171            assert!(
172                in_alternation,
173                "SUPPORTED_FILE_EXT_PATTERN is missing extension '{ext}' from EXTENSION_MAP; \
174                 add it to schema_helpers.rs"
175            );
176        }
177    }
178
179    #[test]
180    fn test_language_for_extension_edge_case() {
181        assert_eq!(language_for_extension("unknown"), None);
182        assert_eq!(language_for_extension(""), None);
183        #[cfg(feature = "lang-rust")]
184        assert_eq!(language_for_extension("RS"), Some("rust"));
185        // Uppercase Fortran extensions resolved via eq_ignore_ascii_case
186        #[cfg(feature = "lang-fortran")]
187        assert_eq!(language_for_extension("F90"), Some("fortran"));
188        #[cfg(feature = "lang-fortran")]
189        assert_eq!(language_for_extension("FOR"), Some("fortran"));
190    }
191}