1use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11#[serde(rename_all = "lowercase")]
12pub enum Language {
13 Rust,
15 TypeScript,
17 Python,
19 Go,
21 Java,
23 Kotlin,
25 Dart,
27 Unknown,
29}
30
31impl Language {
32 pub fn from_extension(ext: &str) -> Self {
50 match ext.to_lowercase().as_str() {
51 "rs" => Language::Rust,
52 "ts" | "tsx" | "js" | "jsx" => Language::TypeScript,
53 "py" => Language::Python,
54 "go" => Language::Go,
55 "java" => Language::Java,
56 "kt" | "kts" => Language::Kotlin,
57 "dart" => Language::Dart,
58 _ => Language::Unknown,
59 }
60 }
61
62 pub fn extensions(&self) -> &'static [&'static str] {
75 match self {
76 Language::Rust => &["rs"],
77 Language::TypeScript => &["ts", "tsx", "js", "jsx"],
78 Language::Python => &["py"],
79 Language::Go => &["go"],
80 Language::Java => &["java"],
81 Language::Kotlin => &["kt", "kts"],
82 Language::Dart => &["dart"],
83 Language::Unknown => &[],
84 }
85 }
86
87 pub fn as_str(&self) -> &'static str {
100 match self {
101 Language::Rust => "rust",
102 Language::TypeScript => "typescript",
103 Language::Python => "python",
104 Language::Go => "go",
105 Language::Java => "java",
106 Language::Kotlin => "kotlin",
107 Language::Dart => "dart",
108 Language::Unknown => "unknown",
109 }
110 }
111}
112
113pub struct LanguageDetector;
118
119impl LanguageDetector {
120 pub fn from_extension(path: &Path) -> Language {
137 path.extension()
138 .and_then(|ext| ext.to_str())
139 .map(Language::from_extension)
140 .unwrap_or(Language::Unknown)
141 }
142
143 pub fn from_content(content: &str) -> Language {
160 if let Some(first_line) = content.lines().next() {
162 if first_line.starts_with("#!") {
163 if first_line.contains("python") {
164 return Language::Python;
165 } else if first_line.contains("node") || first_line.contains("ts-node") {
166 return Language::TypeScript;
167 } else if first_line.contains("ruby") {
168 return Language::Unknown; } else if first_line.contains("bash") || first_line.contains("sh") {
170 return Language::Unknown; }
172 }
173 }
174
175 if content.contains("package ") && content.contains("func ") {
179 return Language::Go;
180 }
181
182 if content.contains("public class ") || content.contains("public interface ") {
184 return Language::Java;
185 }
186
187 if content.contains("void main()") {
189 return Language::Dart;
190 }
191
192 if content.contains("def ") {
194 return Language::Python;
195 }
196
197 if content.contains("use ") {
199 return Language::Rust;
200 }
201
202 if content.contains("export ") {
204 return Language::TypeScript;
205 }
206
207 if content.contains("import ") {
209 return Language::TypeScript;
210 }
211
212 if content.contains("fun ")
214 && (content.contains("class ")
215 || content.contains("object ")
216 || content.contains("interface "))
217 {
218 return Language::Kotlin;
219 }
220
221 if content.contains("fn ") {
223 return Language::Rust;
224 }
225
226 if content.contains("fun ") {
228 return Language::Kotlin;
229 }
230
231 Language::Unknown
232 }
233
234 pub fn detect(path: &Path, content: &str) -> Language {
255 let from_ext = Self::from_extension(path);
256 if from_ext != Language::Unknown {
257 return from_ext;
258 }
259 Self::from_content(content)
260 }
261}
262
263#[cfg(test)]
264mod tests {
265 use super::*;
266
267 #[test]
268 fn test_language_from_extension() {
269 assert_eq!(Language::from_extension("rs"), Language::Rust);
270 assert_eq!(Language::from_extension("ts"), Language::TypeScript);
271 assert_eq!(Language::from_extension("tsx"), Language::TypeScript);
272 assert_eq!(Language::from_extension("js"), Language::TypeScript);
273 assert_eq!(Language::from_extension("jsx"), Language::TypeScript);
274 assert_eq!(Language::from_extension("py"), Language::Python);
275 assert_eq!(Language::from_extension("go"), Language::Go);
276 assert_eq!(Language::from_extension("java"), Language::Java);
277 assert_eq!(Language::from_extension("kt"), Language::Kotlin);
278 assert_eq!(Language::from_extension("kts"), Language::Kotlin);
279 assert_eq!(Language::from_extension("dart"), Language::Dart);
280 assert_eq!(Language::from_extension("unknown"), Language::Unknown);
281 }
282
283 #[test]
284 fn test_language_extensions() {
285 assert_eq!(Language::Rust.extensions(), &["rs"]);
286 assert_eq!(
287 Language::TypeScript.extensions(),
288 &["ts", "tsx", "js", "jsx"]
289 );
290 assert_eq!(Language::Python.extensions(), &["py"]);
291 assert_eq!(Language::Go.extensions(), &["go"]);
292 assert_eq!(Language::Java.extensions(), &["java"]);
293 assert_eq!(Language::Kotlin.extensions(), &["kt", "kts"]);
294 assert_eq!(Language::Dart.extensions(), &["dart"]);
295 assert_eq!(Language::Unknown.extensions(), &[] as &[&str]);
296 }
297
298 #[test]
299 fn test_language_as_str() {
300 assert_eq!(Language::Rust.as_str(), "rust");
301 assert_eq!(Language::TypeScript.as_str(), "typescript");
302 assert_eq!(Language::Python.as_str(), "python");
303 assert_eq!(Language::Go.as_str(), "go");
304 assert_eq!(Language::Java.as_str(), "java");
305 assert_eq!(Language::Kotlin.as_str(), "kotlin");
306 assert_eq!(Language::Dart.as_str(), "dart");
307 assert_eq!(Language::Unknown.as_str(), "unknown");
308 }
309
310 #[test]
311 fn test_language_detector_from_extension() {
312 assert_eq!(
313 LanguageDetector::from_extension(Path::new("test.rs")),
314 Language::Rust
315 );
316 assert_eq!(
317 LanguageDetector::from_extension(Path::new("test.ts")),
318 Language::TypeScript
319 );
320 assert_eq!(
321 LanguageDetector::from_extension(Path::new("test.py")),
322 Language::Python
323 );
324 assert_eq!(
325 LanguageDetector::from_extension(Path::new("test.go")),
326 Language::Go
327 );
328 assert_eq!(
329 LanguageDetector::from_extension(Path::new("test.java")),
330 Language::Java
331 );
332 assert_eq!(
333 LanguageDetector::from_extension(Path::new("test.kt")),
334 Language::Kotlin
335 );
336 assert_eq!(
337 LanguageDetector::from_extension(Path::new("test.dart")),
338 Language::Dart
339 );
340 assert_eq!(
341 LanguageDetector::from_extension(Path::new("test.unknown")),
342 Language::Unknown
343 );
344 }
345
346 #[test]
347 fn test_language_detector_from_content_shebang() {
348 let python_shebang = "#!/usr/bin/env python\nprint('hello')";
349 assert_eq!(
350 LanguageDetector::from_content(python_shebang),
351 Language::Python
352 );
353
354 let node_shebang = "#!/usr/bin/env node\nconsole.log('hello')";
355 assert_eq!(
356 LanguageDetector::from_content(node_shebang),
357 Language::TypeScript
358 );
359 }
360
361 #[test]
362 fn test_language_detector_from_content_patterns() {
363 let rust_code = "use std::io;\nfn main() {}";
364 assert_eq!(LanguageDetector::from_content(rust_code), Language::Rust);
365
366 let go_code = "package main\nfunc main() {}";
367 assert_eq!(LanguageDetector::from_content(go_code), Language::Go);
368
369 let java_code = "public class Main {}";
370 assert_eq!(LanguageDetector::from_content(java_code), Language::Java);
371
372 let kotlin_code = "fun main() {}";
373 assert_eq!(
374 LanguageDetector::from_content(kotlin_code),
375 Language::Kotlin
376 );
377
378 let dart_code = "void main() {}";
379 assert_eq!(LanguageDetector::from_content(dart_code), Language::Dart);
380
381 let ts_code = "import { foo } from 'bar';\nexport const x = 1;";
382 assert_eq!(
383 LanguageDetector::from_content(ts_code),
384 Language::TypeScript
385 );
386
387 let py_code = "import os\ndef hello():\n pass";
388 assert_eq!(LanguageDetector::from_content(py_code), Language::Python);
389 }
390
391 #[test]
392 fn test_language_detector_combined() {
393 let path = Path::new("test.rs");
394 let content = "fn main() {}";
395 assert_eq!(LanguageDetector::detect(path, content), Language::Rust);
396
397 let path = Path::new("test.unknown");
399 let content = "fn main() {}";
400 assert_eq!(LanguageDetector::detect(path, content), Language::Rust);
401 }
402}