Skip to main content

codesearch/file/
language.rs

1use std::path::Path;
2
3/// Supported programming languages
4#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
5pub enum Language {
6    Rust,
7    Python,
8    JavaScript,
9    TypeScript,
10    Go,
11    Java,
12    C,
13    Cpp,
14    CSharp,
15    Ruby,
16    Php,
17    Swift,
18    Kotlin,
19    Shell,
20    Markdown,
21    Json,
22    Yaml,
23    Toml,
24    Sql,
25    Html,
26    Css,
27    Unknown,
28}
29
30impl Language {
31    /// Detect language from file extension
32    pub fn from_path(path: &Path) -> Self {
33        let extension = path.extension().and_then(|e| e.to_str()).unwrap_or("");
34
35        Self::from_extension(extension)
36    }
37
38    /// Detect language from extension string
39    pub fn from_extension(ext: &str) -> Self {
40        match ext.to_lowercase().as_str() {
41            "rs" => Self::Rust,
42            "py" | "pyw" | "pyi" => Self::Python,
43            "js" | "mjs" | "cjs" => Self::JavaScript,
44            "ts" | "mts" | "cts" => Self::TypeScript,
45            "tsx" | "jsx" => Self::TypeScript, // Treat JSX/TSX as TypeScript
46            "go" => Self::Go,
47            "java" => Self::Java,
48            "c" | "h" => Self::C,
49            "cpp" | "cc" | "cxx" | "hpp" | "hxx" => Self::Cpp,
50            "cs" => Self::CSharp,
51            "rb" | "rake" => Self::Ruby,
52            "php" => Self::Php,
53            "swift" => Self::Swift,
54            "kt" | "kts" => Self::Kotlin,
55            "sh" | "bash" | "zsh" => Self::Shell,
56            "md" | "markdown" | "txt" => Self::Markdown, // Treat txt as markdown-like
57            "json" => Self::Json,
58            "yaml" | "yml" => Self::Yaml,
59            "toml" => Self::Toml,
60            "sql" => Self::Sql,
61            "html" | "htm" => Self::Html,
62            "css" | "scss" | "sass" | "less" => Self::Css,
63            _ => Self::Unknown,
64        }
65    }
66
67    /// Check if this language is supported for semantic chunking
68    #[allow(dead_code)] // Reserved for tree-sitter chunking feature
69    pub fn supports_tree_sitter(&self) -> bool {
70        matches!(
71            self,
72            Self::Rust
73                | Self::Python
74                | Self::JavaScript
75                | Self::TypeScript
76                | Self::C
77                | Self::Cpp
78                | Self::CSharp
79                | Self::Go
80                | Self::Java
81        )
82    }
83
84    /// Check if this is a text-based language (should be indexed)
85    pub fn is_indexable(&self) -> bool {
86        !matches!(self, Self::Unknown)
87    }
88
89    /// Get the language name as a string
90    pub fn name(&self) -> &'static str {
91        match self {
92            Self::Rust => "Rust",
93            Self::Python => "Python",
94            Self::JavaScript => "JavaScript",
95            Self::TypeScript => "TypeScript",
96            Self::Go => "Go",
97            Self::Java => "Java",
98            Self::C => "C",
99            Self::Cpp => "C++",
100            Self::CSharp => "C#",
101            Self::Ruby => "Ruby",
102            Self::Php => "PHP",
103            Self::Swift => "Swift",
104            Self::Kotlin => "Kotlin",
105            Self::Shell => "Shell",
106            Self::Markdown => "Markdown",
107            Self::Json => "JSON",
108            Self::Yaml => "YAML",
109            Self::Toml => "TOML",
110            Self::Sql => "SQL",
111            Self::Html => "HTML",
112            Self::Css => "CSS",
113            Self::Unknown => "Unknown",
114        }
115    }
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121    use std::path::PathBuf;
122
123    #[test]
124    fn test_rust_detection() {
125        assert_eq!(Language::from_extension("rs"), Language::Rust);
126        assert_eq!(
127            Language::from_path(&PathBuf::from("main.rs")),
128            Language::Rust
129        );
130    }
131
132    #[test]
133    fn test_python_detection() {
134        assert_eq!(Language::from_extension("py"), Language::Python);
135        assert_eq!(Language::from_extension("pyi"), Language::Python);
136    }
137
138    #[test]
139    fn test_typescript_detection() {
140        assert_eq!(Language::from_extension("ts"), Language::TypeScript);
141        assert_eq!(Language::from_extension("tsx"), Language::TypeScript);
142        assert_eq!(Language::from_extension("jsx"), Language::TypeScript);
143    }
144
145    #[test]
146    fn test_tree_sitter_support() {
147        assert!(Language::Rust.supports_tree_sitter());
148        assert!(Language::Python.supports_tree_sitter());
149        assert!(Language::TypeScript.supports_tree_sitter());
150        assert!(!Language::Markdown.supports_tree_sitter());
151        assert!(!Language::Json.supports_tree_sitter());
152    }
153
154    #[test]
155    fn test_indexable() {
156        assert!(Language::Rust.is_indexable());
157        assert!(Language::Markdown.is_indexable());
158        assert!(!Language::Unknown.is_indexable());
159    }
160}