Skip to main content

run/
detect.rs

1use std::borrow::Cow;
2
3use once_cell::sync::Lazy;
4use regex::Regex;
5
6pub fn detect_language_from_snippet(code: &str) -> Option<&'static str> {
7    let normalized = if code.contains("\r\n") {
8        Cow::Owned(code.replace("\r\n", "\n"))
9    } else {
10        Cow::Borrowed(code)
11    };
12    let trimmed = normalized.trim_start();
13    if trimmed.is_empty() {
14        return None;
15    }
16
17    // Detection is intentionally first-match-wins. Keep this order stable so
18    // ambiguous snippets resolve deterministically across releases.
19    if PYTHON_SIGNATURE.is_match(trimmed) {
20        return Some("python");
21    }
22    if RUST_SIGNATURE.is_match(trimmed) {
23        return Some("rust");
24    }
25    if GO_SIGNATURE.is_match(trimmed) {
26        return Some("go");
27    }
28    if C_SHARP_SIGNATURE.is_match(trimmed) {
29        return Some("csharp");
30    }
31    if CPP_SIGNATURE.is_match(trimmed) {
32        return Some("cpp");
33    }
34    if C_SIGNATURE.is_match(trimmed) {
35        return Some("c");
36    }
37    if JAVA_SIGNATURE.is_match(trimmed) {
38        return Some("java");
39    }
40    if GROOVY_SIGNATURE.is_match(trimmed) {
41        return Some("groovy");
42    }
43    if TYPESCRIPT_SIGNATURE.is_match(trimmed) {
44        return Some("typescript");
45    }
46    if JAVASCRIPT_SIGNATURE.is_match(trimmed) {
47        return Some("javascript");
48    }
49    if RUBY_SIGNATURE.is_match(trimmed) {
50        return Some("ruby");
51    }
52    if KOTLIN_SIGNATURE.is_match(trimmed) {
53        return Some("kotlin");
54    }
55    if PHP_SIGNATURE.is_match(trimmed) {
56        return Some("php");
57    }
58    if LUA_SIGNATURE.is_match(trimmed) {
59        return Some("lua");
60    }
61    if BASH_SIGNATURE.is_match(trimmed) {
62        return Some("bash");
63    }
64    if R_SIGNATURE.is_match(trimmed) {
65        return Some("r");
66    }
67    if DART_SIGNATURE.is_match(trimmed) {
68        return Some("dart");
69    }
70    if SWIFT_SIGNATURE.is_match(trimmed) {
71        return Some("swift");
72    }
73    if PERL_SIGNATURE.is_match(trimmed) {
74        return Some("perl");
75    }
76    if JULIA_SIGNATURE.is_match(trimmed) {
77        return Some("julia");
78    }
79    if HASKELL_SIGNATURE.is_match(trimmed) {
80        return Some("haskell");
81    }
82    if ELIXIR_SIGNATURE.is_match(trimmed) {
83        return Some("elixir");
84    }
85    if CRYSTAL_SIGNATURE.is_match(trimmed) {
86        return Some("crystal");
87    }
88    if ZIG_SIGNATURE.is_match(trimmed) {
89        return Some("zig");
90    }
91    if NIM_SIGNATURE.is_match(trimmed) {
92        return Some("nim");
93    }
94
95    None
96}
97
98static PYTHON_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
99    Regex::new(
100        r"(?m)^(from\s+[\w\.]+\s+import|import\s+[\w\.]+|def\s+[A-Za-z_][\w]*\(|class\s+[A-Za-z_])",
101    )
102    .expect("valid python regex")
103});
104
105static RUST_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
106    Regex::new(r#"(?m)^(fn\s+main\s*\(|use\s+[\w:]+::|#!\[[^\n]+\]|mod\s+[A-Za-z_])"#)
107        .expect("valid rust regex")
108});
109
110static GO_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
111    Regex::new(r#"(?m)^(package\s+main|func\s+main\s*\(|import\s+(?:\w+\s+)?"[^"]+")"#)
112        .expect("valid go regex")
113});
114
115static C_SHARP_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
116    Regex::new(r#"(?m)^(using\s+System|namespace\s+[A-Za-z_][\w\.]*\s*\{|class\s+[A-Za-z_][\w]*\s*\{|\[assembly:)"#)
117        .expect("valid csharp regex")
118});
119
120static C_SIGNATURE: Lazy<Regex> =
121    Lazy::new(|| Regex::new(r#"(?m)^(#include\s+<|int\s+main\s*\()"#).expect("valid c regex"));
122
123static CPP_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
124    Regex::new(r#"(?m)^(?:#include\s+<[^>]+>|using\s+namespace\s+std;|std::|int\s+main\s*\()"#)
125        .expect("valid cpp regex")
126});
127
128static JAVA_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
129    Regex::new(r#"(?m)^(package\s+[\w\.]+;|import\s+java\.|public\s+class\s+|class\s+\w+\s*\{\s*\n\s*public\s+static\s+void\s+main)"#)
130        .expect("valid java regex")
131});
132
133static GROOVY_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
134    Regex::new(
135        r#"(?m)^(?:@Grab|@Grapes|println\s|def\s+\w+\s*=|import\s+groovy\.|class\s+\w+\s*\{|package\s+[\w\.]+)"#,
136    )
137    .expect("valid groovy regex")
138});
139
140static TYPESCRIPT_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
141    Regex::new(r"(?m)^(import\s+\{|type\s+\w+\s*=|interface\s+\w+|class\s+\w+\s+implements)")
142        .expect("valid ts regex")
143});
144
145static JAVASCRIPT_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
146    Regex::new(r#"(?m)^(import\s+(?:\w+\s+from\s+)?["']|console\.log|function\s+\w+\s*\(|module\.exports)"#)
147        .expect("valid js regex")
148});
149
150static RUBY_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
151    Regex::new(r#"(?m)^(require\s+['"]|class\s+\w+|module\s+\w+|puts\s)"#)
152        .expect("valid ruby regex")
153});
154
155static KOTLIN_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
156    Regex::new(r#"(?m)^(package\s+[\w\.]+|import\s+|fun\s+main\s*\(|val\s+\w+\s*=)"#)
157        .expect("valid kotlin regex")
158});
159
160static PHP_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
161    Regex::new(r#"(?m)^(?:<\?php|echo\s+['"]|function\s+\w+\s*\()"#).expect("valid php regex")
162});
163
164static LUA_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
165    Regex::new(r#"(?m)^(local\s+function|function\s+\w+|print\s*\(|--\s)"#)
166        .expect("valid lua regex")
167});
168
169static BASH_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
170    Regex::new(r#"(?m)(^#!\s*/(?:usr/)?bin/(?:env\s+)?(?:bash|sh)|^(?:echo|export|read)\s+|\$\([\w\s]+\))"#)
171        .expect("valid bash regex")
172});
173
174static R_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
175    Regex::new(r"(?m)^(library\(|require\(|print\(|cat\(|[A-Za-z_][\w.]*\s*<-|#[^!]|plot\()")
176        .expect("valid r regex")
177});
178
179static DART_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
180    Regex::new(r"(?m)^(import\s+'dart:|void\s+main\s*\(|class\s+\w+\s*\{|@override)")
181        .expect("valid dart regex")
182});
183
184static SWIFT_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
185    Regex::new(
186        r"(?m)^(import\s+Foundation|func\s+main\s*\(|print\(|class\s+\w+\s*:|struct\s+\w+\s*\{)",
187    )
188    .expect("valid swift regex")
189});
190
191static PERL_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
192    Regex::new(
193        r"(?m)^(?:#!\s*/(?:usr/)?bin/(?:env\s+)?perl|use\s+(?:strict|warnings|feature)\b|my\s+\$|our\s+\$|sub\s+\w|print\s|say\s)"
194    )
195        .expect("valid perl regex")
196});
197
198static JULIA_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
199    Regex::new(
200        r"(?m)^(using\s+|import\s+|function\s+\w|println\(|struct\s+\w|mutable\s+struct\s+\w)",
201    )
202    .expect("valid julia regex")
203});
204
205static HASKELL_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
206    Regex::new(
207        r"(?m)^(module\s+\w+\s+where|import\s+[A-Z][\w\.]*|main\s*::\s*IO\s*\(|main\s*=|data\s+\w+\s*=|type\s+\w+|class\s+\w+|^\s*let\s+\w+\s*=)",
208    )
209    .expect("valid haskell regex")
210});
211
212static ELIXIR_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
213    Regex::new(
214        r"(?m)^(defmodule\s+[A-Z][\w\.]*|defp?\s+\w+\s*\(|IO\.puts|IO\.inspect|alias\s+[A-Z][\w\.]*|use\s+[A-Z][\w\.]*|require\s+[A-Z][\w\.]*)",
215    )
216    .expect("valid elixir regex")
217});
218
219static CRYSTAL_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
220    Regex::new(
221        r"(?m)^(?:@[A-Z][\w]*(?:\([^)]*\))?|struct\s+\w+|enum\s+\w+|record\s+\w+|macro\s+\w+|def\s+\w+\s*(?:\([^)]*\))?\s*:\s*[A-Z])",
222    )
223    .expect("valid crystal regex")
224});
225
226static ZIG_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
227    Regex::new(
228        r#"(?m)^(const\s+\w+\s*=\s*@import\("std"\)|pub\s+fn\s+main\s*\(|fn\s+main\s*\(\)\s*!?void)"#,
229    )
230    .expect("valid zig regex")
231});
232
233static NIM_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
234    Regex::new(
235        r"(?m)^(proc\s+\w+\s*\(|import\s+[\w/]+|echo\s+|let\s+\w+\s*=|var\s+\w+\s*:\s*|template\s+\w+\s*\()",
236    )
237    .expect("valid nim regex")
238});
239
240#[cfg(test)]
241mod tests {
242    use super::detect_language_from_snippet;
243
244    #[test]
245    fn detection_handles_crlf_shebang() {
246        assert_eq!(
247            detect_language_from_snippet("#!/usr/bin/env bash\r\necho hi\r\n"),
248            Some("bash")
249        );
250    }
251
252    #[test]
253    fn ambiguous_detection_is_deterministic() {
254        let snippet = "print(\"hello\")";
255        assert_eq!(detect_language_from_snippet(snippet), Some("lua"));
256        assert_eq!(detect_language_from_snippet(snippet), Some("lua"));
257    }
258}