Skip to main content

equilibrium_ffi/
detector.rs

1//! Language detection for source files.
2
3use std::path::Path;
4
5/// Supported languages that can be compiled to C.
6#[derive(Clone, Copy, Debug, PartialEq, Eq)]
7pub enum Language {
8    /// V language (vlang.io)
9    V,
10    /// Zig language
11    Zig,
12    /// C (already native)
13    C,
14    /// C++
15    Cpp,
16    /// C#
17    CSharp,
18    /// Rust (for cbindgen)
19    Rust,
20    /// D language
21    D,
22    /// Nim language
23    Nim,
24    /// Odin language
25    Odin,
26    /// Hare language
27    Hare,
28}
29
30/// Information about a detected language.
31#[derive(Clone, Debug)]
32pub struct LanguageInfo {
33    pub language: Language,
34    pub compiler: Option<String>,
35    pub version: Option<String>,
36}
37
38impl Language {
39    pub fn cli_name(&self) -> &'static str {
40        match self {
41            Language::V => "v",
42            Language::Zig => "zig",
43            Language::C => "c",
44            Language::Cpp => "cpp",
45            Language::CSharp => "csharp",
46            Language::Rust => "rust",
47            Language::D => "d",
48            Language::Nim => "nim",
49            Language::Odin => "odin",
50            Language::Hare => "hare",
51        }
52    }
53
54    pub fn from_cli_name(name: &str) -> Option<Self> {
55        match name.to_ascii_lowercase().as_str() {
56            "v" | "vlang" => Some(Language::V),
57            "zig" => Some(Language::Zig),
58            "c" => Some(Language::C),
59            "cpp" | "c++" | "cxx" => Some(Language::Cpp),
60            "csharp" | "c#" | "cs" | "dotnet" => Some(Language::CSharp),
61            "rust" | "rs" => Some(Language::Rust),
62            "d" => Some(Language::D),
63            "nim" => Some(Language::Nim),
64            "odin" => Some(Language::Odin),
65            "hare" => Some(Language::Hare),
66            _ => None,
67        }
68    }
69
70    /// Get the file extensions for this language.
71    pub fn extensions(&self) -> &[&str] {
72        match self {
73            Language::V => &["v"],
74            Language::Zig => &["zig"],
75            Language::C => &["c", "h"],
76            Language::Cpp => &["cpp", "cxx", "cc", "hpp", "hxx"],
77            Language::CSharp => &["cs"],
78            Language::Rust => &["rs"],
79            Language::D => &["d", "di"],
80            Language::Nim => &["nim", "nims"],
81            Language::Odin => &["odin"],
82            Language::Hare => &["ha"],
83        }
84    }
85
86    /// Get the typical compiler command for this language.
87    pub fn default_compiler(&self) -> &str {
88        match self {
89            Language::V => "v",
90            Language::Zig => "zig",
91            Language::C => "clang",
92            Language::Cpp => "clang++",
93            Language::CSharp => "csc",
94            Language::Rust => "rustc",
95            Language::D => "ldc2", // or dmd, gdc
96            Language::Nim => "nim",
97            Language::Odin => "odin",
98            Language::Hare => "hare",
99        }
100    }
101
102    /// Get alternative compiler names to try.
103    pub fn alternative_compilers(&self) -> &[&str] {
104        match self {
105            Language::D => &["dmd", "gdc"],
106            Language::C => &["gcc", "cc"],
107            Language::Cpp => &["g++", "c++"],
108            _ => &[],
109        }
110    }
111
112    /// Get the command to compile to C intermediate.
113    pub fn to_c_args(&self, input: &str, output: &str) -> Vec<String> {
114        match self {
115            Language::V => vec![
116                "-o".to_string(),
117                output.to_string(),
118                "-backend".to_string(),
119                "c".to_string(),
120                input.to_string(),
121            ],
122            Language::Zig => {
123                // Zig doesn't have direct C output, but we can use translate-c for headers
124                // For actual code, we emit object files
125                vec![
126                    "build-obj".to_string(),
127                    format!("-femit-bin={output}"),
128                    input.to_string(),
129                ]
130            }
131            Language::C => {
132                // C is already C, just preprocess
133                vec![
134                    "-E".to_string(),
135                    "-o".to_string(),
136                    output.to_string(),
137                    input.to_string(),
138                ]
139            }
140            Language::Cpp => {
141                // Compile to object, we'll need headers separately
142                vec![
143                    "-c".to_string(),
144                    "-o".to_string(),
145                    output.to_string(),
146                    input.to_string(),
147                ]
148            }
149            Language::CSharp => {
150                // C# to native requires AOT compilation
151                vec![
152                    "-target:library".to_string(),
153                    format!("-out:{output}"),
154                    input.to_string(),
155                ]
156            }
157            Language::Rust => {
158                // Rust uses cbindgen for headers + normal compilation
159                vec![
160                    "--crate-type=cdylib".to_string(),
161                    "-o".to_string(),
162                    output.to_string(),
163                    input.to_string(),
164                ]
165            }
166            Language::D => {
167                // D can emit C headers with -HC flag (LDC2)
168                vec![
169                    "-c".to_string(),
170                    "-of".to_string(),
171                    output.to_string(),
172                    "-HC".to_string(), // Generate C header
173                    input.to_string(),
174                ]
175            }
176            Language::Nim => {
177                // Nim compiles to C by default
178                vec![
179                    "c".to_string(),
180                    "--nimcache:.".to_string(),
181                    format!("-o:{output}"),
182                    input.to_string(),
183                ]
184            }
185            Language::Odin => {
186                // Odin compiles to object files
187                vec![
188                    "build".to_string(),
189                    input.to_string(),
190                    "-out:".to_string() + output,
191                    "-build-mode:obj".to_string(),
192                ]
193            }
194            Language::Hare => {
195                // Hare compiles to object files via QBE
196                vec![
197                    "build".to_string(),
198                    "-o".to_string(),
199                    output.to_string(),
200                    input.to_string(),
201                ]
202            }
203        }
204    }
205
206    /// Get all supported languages.
207    pub fn all() -> &'static [Language] {
208        &[
209            Language::V,
210            Language::Zig,
211            Language::C,
212            Language::Cpp,
213            Language::CSharp,
214            Language::Rust,
215            Language::D,
216            Language::Nim,
217            Language::Odin,
218            Language::Hare,
219        ]
220    }
221}
222
223/// Detect the language of a source file based on extension.
224pub fn detect_language(path: &Path) -> Option<Language> {
225    let ext = path.extension()?.to_str()?.to_lowercase();
226
227    for lang in Language::all() {
228        if lang.extensions().contains(&ext.as_str()) {
229            return Some(*lang);
230        }
231    }
232
233    None
234}
235
236/// Check if a compiler is available on the system.
237pub fn find_compiler(language: Language) -> Option<LanguageInfo> {
238    let compiler_name = language.default_compiler();
239
240    // Check primary compiler
241    if which::which(compiler_name).is_ok() {
242        return Some(LanguageInfo {
243            language,
244            compiler: Some(compiler_name.to_string()),
245            version: get_compiler_version(compiler_name),
246        });
247    }
248
249    // Try alternatives
250    for alt in language.alternative_compilers() {
251        if which::which(alt).is_ok() {
252            return Some(LanguageInfo {
253                language,
254                compiler: Some((*alt).to_string()),
255                version: get_compiler_version(alt),
256            });
257        }
258    }
259
260    None
261}
262
263fn get_compiler_version(compiler: &str) -> Option<String> {
264    let output = std::process::Command::new(compiler)
265        .arg("--version")
266        .output()
267        .ok()?;
268
269    if output.status.success() {
270        let stdout = String::from_utf8_lossy(&output.stdout);
271        // Get first line
272        stdout.lines().next().map(|s| s.to_string())
273    } else {
274        None
275    }
276}
277
278/// Scan a directory and detect all source files with their languages.
279pub fn scan_directory(dir: &Path) -> Vec<(std::path::PathBuf, Language)> {
280    let mut results = Vec::new();
281
282    fn visit(dir: &Path, results: &mut Vec<(std::path::PathBuf, Language)>) {
283        if let Ok(entries) = std::fs::read_dir(dir) {
284            for entry in entries.flatten() {
285                let path = entry.path();
286                if path.is_dir() {
287                    // Skip common non-source directories
288                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
289                    if !matches!(
290                        name,
291                        "target"
292                            | "node_modules"
293                            | ".git"
294                            | "build"
295                            | "dist"
296                            | "zig-cache"
297                            | "nimcache"
298                    ) {
299                        visit(&path, results);
300                    }
301                } else if let Some(lang) = detect_language(&path) {
302                    results.push((path, lang));
303                }
304            }
305        }
306    }
307
308    visit(dir, &mut results);
309    results
310}
311
312#[cfg(test)]
313mod tests {
314    use super::*;
315    use tempfile::tempdir;
316
317    #[test]
318    fn test_detect_v() {
319        let path = Path::new("mylib.v");
320        assert_eq!(detect_language(path), Some(Language::V));
321    }
322
323    #[test]
324    fn test_detect_zig() {
325        let path = Path::new("mylib.zig");
326        assert_eq!(detect_language(path), Some(Language::Zig));
327    }
328
329    #[test]
330    fn test_detect_cpp() {
331        assert_eq!(detect_language(Path::new("foo.cpp")), Some(Language::Cpp));
332        assert_eq!(detect_language(Path::new("foo.cxx")), Some(Language::Cpp));
333        assert_eq!(detect_language(Path::new("foo.cc")), Some(Language::Cpp));
334    }
335
336    #[test]
337    fn test_detect_d() {
338        assert_eq!(detect_language(Path::new("foo.d")), Some(Language::D));
339        assert_eq!(detect_language(Path::new("foo.di")), Some(Language::D));
340    }
341
342    #[test]
343    fn test_detect_nim() {
344        assert_eq!(detect_language(Path::new("foo.nim")), Some(Language::Nim));
345    }
346
347    #[test]
348    fn test_detect_odin() {
349        assert_eq!(detect_language(Path::new("foo.odin")), Some(Language::Odin));
350    }
351
352    #[test]
353    fn test_detect_hare() {
354        assert_eq!(detect_language(Path::new("foo.ha")), Some(Language::Hare));
355    }
356
357    #[test]
358    fn test_detect_c_and_header() {
359        assert_eq!(detect_language(Path::new("main.c")), Some(Language::C));
360        assert_eq!(detect_language(Path::new("lib.h")), Some(Language::C));
361    }
362
363    #[test]
364    fn test_detect_rust() {
365        assert_eq!(detect_language(Path::new("main.rs")), Some(Language::Rust));
366    }
367
368    #[test]
369    fn test_detect_csharp() {
370        assert_eq!(
371            detect_language(Path::new("Program.cs")),
372            Some(Language::CSharp)
373        );
374    }
375
376    #[test]
377    fn test_detect_unknown() {
378        assert_eq!(detect_language(Path::new("foo.py")), None);
379        assert_eq!(detect_language(Path::new("foo.js")), None);
380        assert_eq!(detect_language(Path::new("Makefile")), None);
381    }
382
383    #[test]
384    fn test_detect_case_insensitive_extension() {
385        // Extensions are lowercased before matching
386        assert_eq!(detect_language(Path::new("FOO.C")), Some(Language::C));
387        assert_eq!(detect_language(Path::new("main.RS")), Some(Language::Rust));
388    }
389
390    #[test]
391    fn test_all_languages() {
392        assert_eq!(Language::all().len(), 10);
393    }
394
395    #[test]
396    fn test_find_compiler_c_available() {
397        // clang or gcc is expected in any dev environment
398        let info = find_compiler(Language::C);
399        assert!(
400            info.is_some(),
401            "expected a C compiler (clang/gcc) to be on PATH"
402        );
403        let info = info.unwrap();
404        assert!(info.compiler.is_some());
405    }
406
407    #[test]
408    fn test_find_compiler_returns_version() {
409        if let Some(info) = find_compiler(Language::C) {
410            // version is best-effort; just ensure the field exists (may be None on exotic setups)
411            let _ = info.version;
412        }
413    }
414
415    #[test]
416    fn test_to_c_args_c_preprocess() {
417        let args = Language::C.to_c_args("foo.c", "foo.i");
418        assert!(args.contains(&"-E".to_string()));
419        assert!(args.contains(&"foo.c".to_string()));
420        assert!(args.contains(&"foo.i".to_string()));
421    }
422
423    #[test]
424    fn test_to_c_args_zig_no_duplicate_flag() {
425        let args = Language::Zig.to_c_args("foo.zig", "foo.o");
426        assert!(args.contains(&"build-obj".to_string()));
427        let femit_count = args.iter().filter(|a| a.starts_with("-femit-bin")).count();
428        assert_eq!(femit_count, 1, "should have exactly one -femit-bin flag");
429    }
430
431    #[test]
432    fn test_scan_directory_empty() {
433        let dir = tempdir().unwrap();
434        let results = scan_directory(dir.path());
435        assert!(results.is_empty());
436    }
437
438    #[test]
439    fn test_scan_directory_finds_sources() {
440        let dir = tempdir().unwrap();
441        std::fs::write(dir.path().join("lib.c"), "").unwrap();
442        std::fs::write(dir.path().join("lib.v"), "").unwrap();
443        std::fs::write(dir.path().join("README.md"), "").unwrap(); // not a source file
444
445        let results = scan_directory(dir.path());
446        assert_eq!(results.len(), 2);
447        let langs: Vec<Language> = results.iter().map(|(_, l)| *l).collect();
448        assert!(langs.contains(&Language::C));
449        assert!(langs.contains(&Language::V));
450    }
451
452    #[test]
453    fn test_scan_directory_skips_target() {
454        let dir = tempdir().unwrap();
455        let target_dir = dir.path().join("target");
456        std::fs::create_dir(&target_dir).unwrap();
457        std::fs::write(target_dir.join("generated.c"), "").unwrap(); // should be skipped
458        std::fs::write(dir.path().join("main.rs"), "").unwrap();
459
460        let results = scan_directory(dir.path());
461        assert_eq!(results.len(), 1);
462        assert_eq!(results[0].1, Language::Rust);
463    }
464
465    #[test]
466    fn test_scan_directory_recurses() {
467        let dir = tempdir().unwrap();
468        let sub = dir.path().join("src");
469        std::fs::create_dir(&sub).unwrap();
470        std::fs::write(sub.join("lib.zig"), "").unwrap();
471
472        let results = scan_directory(dir.path());
473        assert_eq!(results.len(), 1);
474        assert_eq!(results[0].1, Language::Zig);
475    }
476}