Skip to main content

krait/detect/
language.rs

1use std::path::Path;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
4pub enum Language {
5    Rust,
6    TypeScript,
7    JavaScript,
8    Go,
9    Cpp,
10}
11
12impl Language {
13    /// Human-readable name for display.
14    #[must_use]
15    pub fn name(self) -> &'static str {
16        match self {
17            Self::Rust => "rust",
18            Self::TypeScript => "typescript",
19            Self::JavaScript => "javascript",
20            Self::Go => "go",
21            Self::Cpp => "c++",
22        }
23    }
24}
25
26impl Language {
27    /// File extensions associated with this language.
28    #[must_use]
29    pub fn extensions(self) -> &'static [&'static str] {
30        match self {
31            Self::Rust => &["rs"],
32            Self::TypeScript => &["ts", "tsx"],
33            Self::JavaScript => &["js", "jsx", "mjs", "cjs"],
34            Self::Go => &["go"],
35            Self::Cpp => &["c", "cpp", "cc", "cxx", "h", "hpp", "hxx"],
36        }
37    }
38
39    /// Workspace marker files that indicate this language's project root.
40    /// Used by `find_package_roots()` for monorepo workspace detection.
41    #[must_use]
42    pub fn workspace_markers(self) -> &'static [&'static str] {
43        match self {
44            Self::Rust => &["Cargo.toml"],
45            Self::TypeScript => &["tsconfig.json"],
46            Self::JavaScript => &["package.json"],
47            Self::Go => &["go.mod"],
48            Self::Cpp => &["CMakeLists.txt", "compile_commands.json"],
49        }
50    }
51
52    /// All language variants.
53    pub const ALL: &'static [Language] = &[
54        Language::Rust,
55        Language::TypeScript,
56        Language::JavaScript,
57        Language::Go,
58        Language::Cpp,
59    ];
60}
61
62/// Determine the language for a file based on its extension.
63/// Delegates to `Language::extensions()` — single source of truth.
64#[must_use]
65pub fn language_for_file(path: &Path) -> Option<Language> {
66    let ext = path.extension()?.to_str()?;
67    Language::ALL
68        .iter()
69        .copied()
70        .find(|lang| lang.extensions().contains(&ext))
71}
72
73impl std::fmt::Display for Language {
74    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75        f.write_str(self.name())
76    }
77}
78
79/// Common JS/TS monorepo subdirectory conventions.
80const MONOREPO_DIRS: &[&str] = &["packages", "apps", "libs", "src"];
81
82/// Detect languages used in a project by scanning for config files.
83/// Marker file names come from `Language::workspace_markers()` — single source of truth.
84/// Returns languages in priority order.
85#[must_use]
86pub fn detect_languages(root: &Path) -> Vec<Language> {
87    let mut languages = Vec::new();
88
89    if Language::Rust
90        .workspace_markers()
91        .iter()
92        .any(|m| root.join(m).exists())
93    {
94        languages.push(Language::Rust);
95    }
96
97    // TypeScript and JavaScript share package.json; tsconfig.json or .ts files disambiguate.
98    let has_tsconfig = Language::TypeScript
99        .workspace_markers()
100        .iter()
101        .any(|m| root.join(m).exists());
102    let has_package_json = Language::JavaScript
103        .workspace_markers()
104        .iter()
105        .any(|m| root.join(m).exists());
106
107    if has_tsconfig || has_ts_files(root) {
108        languages.push(Language::TypeScript);
109    } else if has_package_json {
110        languages.push(Language::JavaScript);
111    }
112
113    if Language::Go
114        .workspace_markers()
115        .iter()
116        .any(|m| root.join(m).exists())
117    {
118        languages.push(Language::Go);
119    }
120
121    if Language::Cpp
122        .workspace_markers()
123        .iter()
124        .any(|m| root.join(m).exists())
125        || has_c_files(root)
126    {
127        languages.push(Language::Cpp);
128    }
129
130    languages
131}
132
133fn has_ts_files(root: &Path) -> bool {
134    let mut dirs = Vec::new();
135    let src = root.join("src");
136    if src.is_dir() {
137        dirs.push(src);
138    }
139    dirs.push(root.to_path_buf());
140
141    // Monorepo: scan well-known subdirectory conventions for tsconfig or .ts files
142    for &pkg_dir in MONOREPO_DIRS {
143        let pd = root.join(pkg_dir);
144        if let Ok(entries) = std::fs::read_dir(&pd) {
145            for entry in entries.filter_map(Result::ok) {
146                let pkg = entry.path();
147                if pkg.is_dir() {
148                    // tsconfig.json in a package is a strong signal
149                    if Language::TypeScript
150                        .workspace_markers()
151                        .iter()
152                        .any(|m| pkg.join(m).exists())
153                    {
154                        return true;
155                    }
156                    let pkg_src = pkg.join("src");
157                    if pkg_src.is_dir() {
158                        dirs.push(pkg_src);
159                    }
160                }
161            }
162        }
163    }
164
165    let ts_exts = Language::TypeScript.extensions();
166    for dir in &dirs {
167        let Ok(entries) = std::fs::read_dir(dir) else {
168            continue;
169        };
170        if entries.filter_map(Result::ok).any(|e| {
171            e.path()
172                .extension()
173                .and_then(|x| x.to_str())
174                .is_some_and(|x| ts_exts.contains(&x))
175        }) {
176            return true;
177        }
178    }
179    false
180}
181
182/// Returns true if the project root (or its `src/` subdirectory) contains C/C++ source files.
183/// Handles Makefile-based and legacy C/C++ projects that lack `CMakeLists.txt` or
184/// `compile_commands.json`.
185fn has_c_files(root: &Path) -> bool {
186    // C source extensions (not headers — headers alone don't indicate a buildable project)
187    const C_SRC_EXTS: &[&str] = &["c", "cpp", "cc", "cxx"];
188
189    let mut dirs = vec![root.to_path_buf()];
190    let src = root.join("src");
191    if src.is_dir() {
192        dirs.push(src);
193    }
194
195    for dir in &dirs {
196        let Ok(entries) = std::fs::read_dir(dir) else {
197            continue;
198        };
199        if entries.filter_map(Result::ok).any(|e| {
200            e.path()
201                .extension()
202                .and_then(|x| x.to_str())
203                .is_some_and(|x| C_SRC_EXTS.contains(&x))
204        }) {
205            return true;
206        }
207    }
208    false
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214
215    #[test]
216    fn detects_rust_project() {
217        let dir = tempfile::tempdir().unwrap();
218        std::fs::write(dir.path().join("Cargo.toml"), "").unwrap();
219
220        let langs = detect_languages(dir.path());
221        assert_eq!(langs, vec![Language::Rust]);
222    }
223
224    #[test]
225    fn detects_typescript_project() {
226        let dir = tempfile::tempdir().unwrap();
227        std::fs::write(dir.path().join("tsconfig.json"), "{}").unwrap();
228
229        let langs = detect_languages(dir.path());
230        assert_eq!(langs, vec![Language::TypeScript]);
231    }
232
233    #[test]
234    fn detects_typescript_from_package_json_with_ts_files() {
235        let dir = tempfile::tempdir().unwrap();
236        std::fs::write(dir.path().join("package.json"), "{}").unwrap();
237        std::fs::create_dir(dir.path().join("src")).unwrap();
238        std::fs::write(dir.path().join("src/index.ts"), "").unwrap();
239
240        let langs = detect_languages(dir.path());
241        assert_eq!(langs, vec![Language::TypeScript]);
242    }
243
244    #[test]
245    fn detects_typescript_monorepo_with_packages() {
246        let dir = tempfile::tempdir().unwrap();
247        std::fs::write(dir.path().join("package.json"), "{}").unwrap();
248        let pkg = dir.path().join("packages/api");
249        std::fs::create_dir_all(&pkg).unwrap();
250        std::fs::write(pkg.join("tsconfig.json"), "{}").unwrap();
251
252        let langs = detect_languages(dir.path());
253        assert_eq!(langs, vec![Language::TypeScript]);
254    }
255
256    #[test]
257    fn detects_typescript_nested_under_src() {
258        // Projects like `meet` where TS packages live under src/frontend, src/sdk/...
259        let dir = tempfile::tempdir().unwrap();
260        let pkg = dir.path().join("src/frontend");
261        std::fs::create_dir_all(&pkg).unwrap();
262        std::fs::write(pkg.join("tsconfig.json"), "{}").unwrap();
263
264        let langs = detect_languages(dir.path());
265        assert_eq!(langs, vec![Language::TypeScript]);
266    }
267
268    #[test]
269    fn detects_javascript_from_package_json_without_ts() {
270        let dir = tempfile::tempdir().unwrap();
271        std::fs::write(dir.path().join("package.json"), "{}").unwrap();
272
273        let langs = detect_languages(dir.path());
274        assert_eq!(langs, vec![Language::JavaScript]);
275    }
276
277    #[test]
278    fn detects_go_project() {
279        let dir = tempfile::tempdir().unwrap();
280        std::fs::write(dir.path().join("go.mod"), "").unwrap();
281
282        let langs = detect_languages(dir.path());
283        assert_eq!(langs, vec![Language::Go]);
284    }
285
286    #[test]
287    fn detects_polyglot() {
288        let dir = tempfile::tempdir().unwrap();
289        std::fs::write(dir.path().join("Cargo.toml"), "").unwrap();
290        std::fs::write(dir.path().join("package.json"), "{}").unwrap();
291
292        let langs = detect_languages(dir.path());
293        assert_eq!(langs, vec![Language::Rust, Language::JavaScript]);
294    }
295
296    #[test]
297    fn empty_project_returns_empty() {
298        let dir = tempfile::tempdir().unwrap();
299        let langs = detect_languages(dir.path());
300        assert!(langs.is_empty());
301    }
302
303    #[test]
304    fn detects_cpp_from_cmake() {
305        let dir = tempfile::tempdir().unwrap();
306        std::fs::write(dir.path().join("CMakeLists.txt"), "").unwrap();
307
308        let langs = detect_languages(dir.path());
309        assert_eq!(langs, vec![Language::Cpp]);
310    }
311
312    #[test]
313    fn detects_c_project_from_root_c_files() {
314        let dir = tempfile::tempdir().unwrap();
315        std::fs::write(dir.path().join("main.c"), "int main() {}").unwrap();
316
317        let langs = detect_languages(dir.path());
318        assert_eq!(langs, vec![Language::Cpp]);
319    }
320
321    #[test]
322    fn detects_c_project_from_src_c_files() {
323        let dir = tempfile::tempdir().unwrap();
324        std::fs::create_dir(dir.path().join("src")).unwrap();
325        std::fs::write(dir.path().join("src/app.c"), "").unwrap();
326
327        let langs = detect_languages(dir.path());
328        assert_eq!(langs, vec![Language::Cpp]);
329    }
330
331    #[test]
332    fn detects_cpp_project_from_src_cpp_files() {
333        let dir = tempfile::tempdir().unwrap();
334        std::fs::create_dir(dir.path().join("src")).unwrap();
335        std::fs::write(dir.path().join("src/main.cpp"), "").unwrap();
336
337        let langs = detect_languages(dir.path());
338        assert_eq!(langs, vec![Language::Cpp]);
339    }
340
341    #[test]
342    fn headers_only_not_detected_as_c() {
343        // .h files alone shouldn't trigger C detection (could be headers for another language)
344        let dir = tempfile::tempdir().unwrap();
345        std::fs::write(dir.path().join("config.h"), "").unwrap();
346
347        let langs = detect_languages(dir.path());
348        assert!(langs.is_empty());
349    }
350}