ricecoder_completion/
language.rs

1/// Language detection and identification utilities
2///
3/// This module provides language detection capabilities for code completion,
4/// including file extension detection, content-based detection, and language
5/// identification for supported programming languages.
6use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9/// Supported programming languages for code completion
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
11#[serde(rename_all = "lowercase")]
12pub enum Language {
13    /// Rust programming language
14    Rust,
15    /// TypeScript/JavaScript programming language
16    TypeScript,
17    /// Python programming language
18    Python,
19    /// Go programming language
20    Go,
21    /// Java programming language
22    Java,
23    /// Kotlin programming language
24    Kotlin,
25    /// Dart programming language
26    Dart,
27    /// Unknown or unsupported language
28    Unknown,
29}
30
31impl Language {
32    /// Detect language from file extension
33    ///
34    /// # Arguments
35    ///
36    /// * `ext` - File extension (without the dot)
37    ///
38    /// # Returns
39    ///
40    /// The detected language, or `Language::Unknown` if not recognized
41    ///
42    /// # Example
43    ///
44    /// ```ignore
45    /// assert_eq!(Language::from_extension("rs"), Language::Rust);
46    /// assert_eq!(Language::from_extension("ts"), Language::TypeScript);
47    /// assert_eq!(Language::from_extension("py"), Language::Python);
48    /// ```
49    pub fn from_extension(ext: &str) -> Self {
50        match ext.to_lowercase().as_str() {
51            "rs" => Language::Rust,
52            "ts" | "tsx" | "js" | "jsx" => Language::TypeScript,
53            "py" => Language::Python,
54            "go" => Language::Go,
55            "java" => Language::Java,
56            "kt" | "kts" => Language::Kotlin,
57            "dart" => Language::Dart,
58            _ => Language::Unknown,
59        }
60    }
61
62    /// Get file extensions for this language
63    ///
64    /// # Returns
65    ///
66    /// A slice of file extensions (without dots) for this language
67    ///
68    /// # Example
69    ///
70    /// ```ignore
71    /// assert_eq!(Language::Rust.extensions(), &["rs"]);
72    /// assert_eq!(Language::TypeScript.extensions(), &["ts", "tsx", "js", "jsx"]);
73    /// ```
74    pub fn extensions(&self) -> &'static [&'static str] {
75        match self {
76            Language::Rust => &["rs"],
77            Language::TypeScript => &["ts", "tsx", "js", "jsx"],
78            Language::Python => &["py"],
79            Language::Go => &["go"],
80            Language::Java => &["java"],
81            Language::Kotlin => &["kt", "kts"],
82            Language::Dart => &["dart"],
83            Language::Unknown => &[],
84        }
85    }
86
87    /// Convert language to string identifier
88    ///
89    /// # Returns
90    ///
91    /// A string identifier for this language
92    ///
93    /// # Example
94    ///
95    /// ```ignore
96    /// assert_eq!(Language::Rust.as_str(), "rust");
97    /// assert_eq!(Language::TypeScript.as_str(), "typescript");
98    /// ```
99    pub fn as_str(&self) -> &'static str {
100        match self {
101            Language::Rust => "rust",
102            Language::TypeScript => "typescript",
103            Language::Python => "python",
104            Language::Go => "go",
105            Language::Java => "java",
106            Language::Kotlin => "kotlin",
107            Language::Dart => "dart",
108            Language::Unknown => "unknown",
109        }
110    }
111}
112
113/// Language detection utilities
114///
115/// Provides methods for detecting programming languages from file paths,
116/// file content, or both.
117pub struct LanguageDetector;
118
119impl LanguageDetector {
120    /// Detect language from file extension
121    ///
122    /// # Arguments
123    ///
124    /// * `path` - Path to the file
125    ///
126    /// # Returns
127    ///
128    /// The detected language, or `Language::Unknown` if extension is not recognized
129    ///
130    /// # Example
131    ///
132    /// ```ignore
133    /// let lang = LanguageDetector::from_extension(Path::new("main.rs"));
134    /// assert_eq!(lang, Language::Rust);
135    /// ```
136    pub fn from_extension(path: &Path) -> Language {
137        path.extension()
138            .and_then(|ext| ext.to_str())
139            .map(Language::from_extension)
140            .unwrap_or(Language::Unknown)
141    }
142
143    /// Detect language from file content (shebang or imports)
144    ///
145    /// # Arguments
146    ///
147    /// * `content` - The file content to analyze
148    ///
149    /// # Returns
150    ///
151    /// The detected language based on content patterns, or `Language::Unknown`
152    ///
153    /// # Example
154    ///
155    /// ```ignore
156    /// let lang = LanguageDetector::from_content("#!/usr/bin/env python\nprint('hello')");
157    /// assert_eq!(lang, Language::Python);
158    /// ```
159    pub fn from_content(content: &str) -> Language {
160        // Check for shebang
161        if let Some(first_line) = content.lines().next() {
162            if first_line.starts_with("#!") {
163                if first_line.contains("python") {
164                    return Language::Python;
165                } else if first_line.contains("node") || first_line.contains("ts-node") {
166                    return Language::TypeScript;
167                } else if first_line.contains("ruby") {
168                    return Language::Unknown; // Ruby not supported yet
169                } else if first_line.contains("bash") || first_line.contains("sh") {
170                    return Language::Unknown; // Shell not supported yet
171                }
172            }
173        }
174
175        // Check for language-specific patterns (order matters - most specific first)
176
177        // Check Go (package + func is very specific to Go)
178        if content.contains("package ") && content.contains("func ") {
179            return Language::Go;
180        }
181
182        // Check Java (public class/interface is very specific to Java)
183        if content.contains("public class ") || content.contains("public interface ") {
184            return Language::Java;
185        }
186
187        // Check Dart (void main() is very specific to Dart)
188        if content.contains("void main()") {
189            return Language::Dart;
190        }
191
192        // Check Python (def is Python-specific)
193        if content.contains("def ") {
194            return Language::Python;
195        }
196
197        // Check Rust (use is very specific to Rust)
198        if content.contains("use ") {
199            return Language::Rust;
200        }
201
202        // Check TypeScript/JavaScript (export is more specific than import)
203        if content.contains("export ") {
204            return Language::TypeScript;
205        }
206
207        // Check for import statements (generic, but TypeScript/JS specific in context)
208        if content.contains("import ") {
209            return Language::TypeScript;
210        }
211
212        // Check Kotlin (fun is Kotlin-specific when combined with class/object/interface)
213        if content.contains("fun ")
214            && (content.contains("class ")
215                || content.contains("object ")
216                || content.contains("interface "))
217        {
218            return Language::Kotlin;
219        }
220
221        // Fallback: if we see fn, assume Rust (fn is Rust-specific)
222        if content.contains("fn ") {
223            return Language::Rust;
224        }
225
226        // Fallback: if we see fun, assume Kotlin (fun is Kotlin-specific)
227        if content.contains("fun ") {
228            return Language::Kotlin;
229        }
230
231        Language::Unknown
232    }
233
234    /// Detect language from both extension and content
235    ///
236    /// # Arguments
237    ///
238    /// * `path` - Path to the file
239    /// * `content` - The file content to analyze
240    ///
241    /// # Returns
242    ///
243    /// The detected language, preferring extension detection over content detection
244    ///
245    /// # Example
246    ///
247    /// ```ignore
248    /// let lang = LanguageDetector::detect(
249    ///     Path::new("main.rs"),
250    ///     "fn main() {}"
251    /// );
252    /// assert_eq!(lang, Language::Rust);
253    /// ```
254    pub fn detect(path: &Path, content: &str) -> Language {
255        let from_ext = Self::from_extension(path);
256        if from_ext != Language::Unknown {
257            return from_ext;
258        }
259        Self::from_content(content)
260    }
261}
262
263#[cfg(test)]
264mod tests {
265    use super::*;
266
267    #[test]
268    fn test_language_from_extension() {
269        assert_eq!(Language::from_extension("rs"), Language::Rust);
270        assert_eq!(Language::from_extension("ts"), Language::TypeScript);
271        assert_eq!(Language::from_extension("tsx"), Language::TypeScript);
272        assert_eq!(Language::from_extension("js"), Language::TypeScript);
273        assert_eq!(Language::from_extension("jsx"), Language::TypeScript);
274        assert_eq!(Language::from_extension("py"), Language::Python);
275        assert_eq!(Language::from_extension("go"), Language::Go);
276        assert_eq!(Language::from_extension("java"), Language::Java);
277        assert_eq!(Language::from_extension("kt"), Language::Kotlin);
278        assert_eq!(Language::from_extension("kts"), Language::Kotlin);
279        assert_eq!(Language::from_extension("dart"), Language::Dart);
280        assert_eq!(Language::from_extension("unknown"), Language::Unknown);
281    }
282
283    #[test]
284    fn test_language_extensions() {
285        assert_eq!(Language::Rust.extensions(), &["rs"]);
286        assert_eq!(
287            Language::TypeScript.extensions(),
288            &["ts", "tsx", "js", "jsx"]
289        );
290        assert_eq!(Language::Python.extensions(), &["py"]);
291        assert_eq!(Language::Go.extensions(), &["go"]);
292        assert_eq!(Language::Java.extensions(), &["java"]);
293        assert_eq!(Language::Kotlin.extensions(), &["kt", "kts"]);
294        assert_eq!(Language::Dart.extensions(), &["dart"]);
295        assert_eq!(Language::Unknown.extensions(), &[] as &[&str]);
296    }
297
298    #[test]
299    fn test_language_as_str() {
300        assert_eq!(Language::Rust.as_str(), "rust");
301        assert_eq!(Language::TypeScript.as_str(), "typescript");
302        assert_eq!(Language::Python.as_str(), "python");
303        assert_eq!(Language::Go.as_str(), "go");
304        assert_eq!(Language::Java.as_str(), "java");
305        assert_eq!(Language::Kotlin.as_str(), "kotlin");
306        assert_eq!(Language::Dart.as_str(), "dart");
307        assert_eq!(Language::Unknown.as_str(), "unknown");
308    }
309
310    #[test]
311    fn test_language_detector_from_extension() {
312        assert_eq!(
313            LanguageDetector::from_extension(Path::new("test.rs")),
314            Language::Rust
315        );
316        assert_eq!(
317            LanguageDetector::from_extension(Path::new("test.ts")),
318            Language::TypeScript
319        );
320        assert_eq!(
321            LanguageDetector::from_extension(Path::new("test.py")),
322            Language::Python
323        );
324        assert_eq!(
325            LanguageDetector::from_extension(Path::new("test.go")),
326            Language::Go
327        );
328        assert_eq!(
329            LanguageDetector::from_extension(Path::new("test.java")),
330            Language::Java
331        );
332        assert_eq!(
333            LanguageDetector::from_extension(Path::new("test.kt")),
334            Language::Kotlin
335        );
336        assert_eq!(
337            LanguageDetector::from_extension(Path::new("test.dart")),
338            Language::Dart
339        );
340        assert_eq!(
341            LanguageDetector::from_extension(Path::new("test.unknown")),
342            Language::Unknown
343        );
344    }
345
346    #[test]
347    fn test_language_detector_from_content_shebang() {
348        let python_shebang = "#!/usr/bin/env python\nprint('hello')";
349        assert_eq!(
350            LanguageDetector::from_content(python_shebang),
351            Language::Python
352        );
353
354        let node_shebang = "#!/usr/bin/env node\nconsole.log('hello')";
355        assert_eq!(
356            LanguageDetector::from_content(node_shebang),
357            Language::TypeScript
358        );
359    }
360
361    #[test]
362    fn test_language_detector_from_content_patterns() {
363        let rust_code = "use std::io;\nfn main() {}";
364        assert_eq!(LanguageDetector::from_content(rust_code), Language::Rust);
365
366        let go_code = "package main\nfunc main() {}";
367        assert_eq!(LanguageDetector::from_content(go_code), Language::Go);
368
369        let java_code = "public class Main {}";
370        assert_eq!(LanguageDetector::from_content(java_code), Language::Java);
371
372        let kotlin_code = "fun main() {}";
373        assert_eq!(
374            LanguageDetector::from_content(kotlin_code),
375            Language::Kotlin
376        );
377
378        let dart_code = "void main() {}";
379        assert_eq!(LanguageDetector::from_content(dart_code), Language::Dart);
380
381        let ts_code = "import { foo } from 'bar';\nexport const x = 1;";
382        assert_eq!(
383            LanguageDetector::from_content(ts_code),
384            Language::TypeScript
385        );
386
387        let py_code = "import os\ndef hello():\n    pass";
388        assert_eq!(LanguageDetector::from_content(py_code), Language::Python);
389    }
390
391    #[test]
392    fn test_language_detector_combined() {
393        let path = Path::new("test.rs");
394        let content = "fn main() {}";
395        assert_eq!(LanguageDetector::detect(path, content), Language::Rust);
396
397        // Test fallback to content detection
398        let path = Path::new("test.unknown");
399        let content = "fn main() {}";
400        assert_eq!(LanguageDetector::detect(path, content), Language::Rust);
401    }
402}