1use std::borrow::Cow;
2
3use once_cell::sync::Lazy;
4use regex::Regex;
5
6pub fn detect_language_from_snippet(code: &str) -> Option<&'static str> {
7 let normalized = if code.contains("\r\n") {
8 Cow::Owned(code.replace("\r\n", "\n"))
9 } else {
10 Cow::Borrowed(code)
11 };
12 let trimmed = normalized.trim_start();
13 if trimmed.is_empty() {
14 return None;
15 }
16
17 if PYTHON_SIGNATURE.is_match(trimmed) {
20 return Some("python");
21 }
22 if RUST_SIGNATURE.is_match(trimmed) {
23 return Some("rust");
24 }
25 if GO_SIGNATURE.is_match(trimmed) {
26 return Some("go");
27 }
28 if C_SHARP_SIGNATURE.is_match(trimmed) {
29 return Some("csharp");
30 }
31 if CPP_SIGNATURE.is_match(trimmed) {
32 return Some("cpp");
33 }
34 if C_SIGNATURE.is_match(trimmed) {
35 return Some("c");
36 }
37 if JAVA_SIGNATURE.is_match(trimmed) {
38 return Some("java");
39 }
40 if GROOVY_SIGNATURE.is_match(trimmed) {
41 return Some("groovy");
42 }
43 if TYPESCRIPT_SIGNATURE.is_match(trimmed) {
44 return Some("typescript");
45 }
46 if JAVASCRIPT_SIGNATURE.is_match(trimmed) {
47 return Some("javascript");
48 }
49 if RUBY_SIGNATURE.is_match(trimmed) {
50 return Some("ruby");
51 }
52 if KOTLIN_SIGNATURE.is_match(trimmed) {
53 return Some("kotlin");
54 }
55 if PHP_SIGNATURE.is_match(trimmed) {
56 return Some("php");
57 }
58 if LUA_SIGNATURE.is_match(trimmed) {
59 return Some("lua");
60 }
61 if BASH_SIGNATURE.is_match(trimmed) {
62 return Some("bash");
63 }
64 if R_SIGNATURE.is_match(trimmed) {
65 return Some("r");
66 }
67 if DART_SIGNATURE.is_match(trimmed) {
68 return Some("dart");
69 }
70 if SWIFT_SIGNATURE.is_match(trimmed) {
71 return Some("swift");
72 }
73 if PERL_SIGNATURE.is_match(trimmed) {
74 return Some("perl");
75 }
76 if JULIA_SIGNATURE.is_match(trimmed) {
77 return Some("julia");
78 }
79 if HASKELL_SIGNATURE.is_match(trimmed) {
80 return Some("haskell");
81 }
82 if ELIXIR_SIGNATURE.is_match(trimmed) {
83 return Some("elixir");
84 }
85 if CRYSTAL_SIGNATURE.is_match(trimmed) {
86 return Some("crystal");
87 }
88 if ZIG_SIGNATURE.is_match(trimmed) {
89 return Some("zig");
90 }
91 if NIM_SIGNATURE.is_match(trimmed) {
92 return Some("nim");
93 }
94
95 None
96}
97
98static PYTHON_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
99 Regex::new(
100 r"(?m)^(from\s+[\w\.]+\s+import|import\s+[\w\.]+|def\s+[A-Za-z_][\w]*\(|class\s+[A-Za-z_])",
101 )
102 .expect("valid python regex")
103});
104
105static RUST_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
106 Regex::new(r#"(?m)^(fn\s+main\s*\(|use\s+[\w:]+::|#!\[[^\n]+\]|mod\s+[A-Za-z_])"#)
107 .expect("valid rust regex")
108});
109
110static GO_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
111 Regex::new(r#"(?m)^(package\s+main|func\s+main\s*\(|import\s+(?:\w+\s+)?"[^"]+")"#)
112 .expect("valid go regex")
113});
114
115static C_SHARP_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
116 Regex::new(r#"(?m)^(using\s+System|namespace\s+[A-Za-z_][\w\.]*\s*\{|class\s+[A-Za-z_][\w]*\s*\{|\[assembly:)"#)
117 .expect("valid csharp regex")
118});
119
120static C_SIGNATURE: Lazy<Regex> =
121 Lazy::new(|| Regex::new(r#"(?m)^(#include\s+<|int\s+main\s*\()"#).expect("valid c regex"));
122
123static CPP_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
124 Regex::new(r#"(?m)^(?:#include\s+<[^>]+>|using\s+namespace\s+std;|std::|int\s+main\s*\()"#)
125 .expect("valid cpp regex")
126});
127
128static JAVA_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
129 Regex::new(r#"(?m)^(package\s+[\w\.]+;|import\s+java\.|public\s+class\s+|class\s+\w+\s*\{\s*\n\s*public\s+static\s+void\s+main)"#)
130 .expect("valid java regex")
131});
132
133static GROOVY_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
134 Regex::new(
135 r#"(?m)^(?:@Grab|@Grapes|println\s|def\s+\w+\s*=|import\s+groovy\.|class\s+\w+\s*\{|package\s+[\w\.]+)"#,
136 )
137 .expect("valid groovy regex")
138});
139
140static TYPESCRIPT_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
141 Regex::new(r"(?m)^(import\s+\{|type\s+\w+\s*=|interface\s+\w+|class\s+\w+\s+implements)")
142 .expect("valid ts regex")
143});
144
145static JAVASCRIPT_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
146 Regex::new(r#"(?m)^(import\s+(?:\w+\s+from\s+)?["']|console\.log|function\s+\w+\s*\(|module\.exports)"#)
147 .expect("valid js regex")
148});
149
150static RUBY_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
151 Regex::new(r#"(?m)^(require\s+['"]|class\s+\w+|module\s+\w+|puts\s)"#)
152 .expect("valid ruby regex")
153});
154
155static KOTLIN_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
156 Regex::new(r#"(?m)^(package\s+[\w\.]+|import\s+|fun\s+main\s*\(|val\s+\w+\s*=)"#)
157 .expect("valid kotlin regex")
158});
159
160static PHP_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
161 Regex::new(r#"(?m)^(?:<\?php|echo\s+['"]|function\s+\w+\s*\()"#).expect("valid php regex")
162});
163
164static LUA_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
165 Regex::new(r#"(?m)^(local\s+function|function\s+\w+|print\s*\(|--\s)"#)
166 .expect("valid lua regex")
167});
168
169static BASH_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
170 Regex::new(r#"(?m)(^#!\s*/(?:usr/)?bin/(?:env\s+)?(?:bash|sh)|^(?:echo|export|read)\s+|\$\([\w\s]+\))"#)
171 .expect("valid bash regex")
172});
173
174static R_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
175 Regex::new(r"(?m)^(library\(|require\(|print\(|cat\(|[A-Za-z_][\w.]*\s*<-|#[^!]|plot\()")
176 .expect("valid r regex")
177});
178
179static DART_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
180 Regex::new(r"(?m)^(import\s+'dart:|void\s+main\s*\(|class\s+\w+\s*\{|@override)")
181 .expect("valid dart regex")
182});
183
184static SWIFT_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
185 Regex::new(
186 r"(?m)^(import\s+Foundation|func\s+main\s*\(|print\(|class\s+\w+\s*:|struct\s+\w+\s*\{)",
187 )
188 .expect("valid swift regex")
189});
190
191static PERL_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
192 Regex::new(
193 r"(?m)^(?:#!\s*/(?:usr/)?bin/(?:env\s+)?perl|use\s+(?:strict|warnings|feature)\b|my\s+\$|our\s+\$|sub\s+\w|print\s|say\s)"
194 )
195 .expect("valid perl regex")
196});
197
198static JULIA_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
199 Regex::new(
200 r"(?m)^(using\s+|import\s+|function\s+\w|println\(|struct\s+\w|mutable\s+struct\s+\w)",
201 )
202 .expect("valid julia regex")
203});
204
205static HASKELL_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
206 Regex::new(
207 r"(?m)^(module\s+\w+\s+where|import\s+[A-Z][\w\.]*|main\s*::\s*IO\s*\(|main\s*=|data\s+\w+\s*=|type\s+\w+|class\s+\w+|^\s*let\s+\w+\s*=)",
208 )
209 .expect("valid haskell regex")
210});
211
212static ELIXIR_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
213 Regex::new(
214 r"(?m)^(defmodule\s+[A-Z][\w\.]*|defp?\s+\w+\s*\(|IO\.puts|IO\.inspect|alias\s+[A-Z][\w\.]*|use\s+[A-Z][\w\.]*|require\s+[A-Z][\w\.]*)",
215 )
216 .expect("valid elixir regex")
217});
218
219static CRYSTAL_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
220 Regex::new(
221 r"(?m)^(?:@[A-Z][\w]*(?:\([^)]*\))?|struct\s+\w+|enum\s+\w+|record\s+\w+|macro\s+\w+|def\s+\w+\s*(?:\([^)]*\))?\s*:\s*[A-Z])",
222 )
223 .expect("valid crystal regex")
224});
225
226static ZIG_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
227 Regex::new(
228 r#"(?m)^(const\s+\w+\s*=\s*@import\("std"\)|pub\s+fn\s+main\s*\(|fn\s+main\s*\(\)\s*!?void)"#,
229 )
230 .expect("valid zig regex")
231});
232
233static NIM_SIGNATURE: Lazy<Regex> = Lazy::new(|| {
234 Regex::new(
235 r"(?m)^(proc\s+\w+\s*\(|import\s+[\w/]+|echo\s+|let\s+\w+\s*=|var\s+\w+\s*:\s*|template\s+\w+\s*\()",
236 )
237 .expect("valid nim regex")
238});
239
240#[cfg(test)]
241mod tests {
242 use super::detect_language_from_snippet;
243
244 #[test]
245 fn detection_handles_crlf_shebang() {
246 assert_eq!(
247 detect_language_from_snippet("#!/usr/bin/env bash\r\necho hi\r\n"),
248 Some("bash")
249 );
250 }
251
252 #[test]
253 fn ambiguous_detection_is_deterministic() {
254 let snippet = "print(\"hello\")";
255 assert_eq!(detect_language_from_snippet(snippet), Some("lua"));
256 assert_eq!(detect_language_from_snippet(snippet), Some("lua"));
257 }
258}