1use crate::{Error, Language};
4use std::path::Path;
5use tree_sitter::Language as TSLanguage;
6
7pub fn get_tree_sitter_language(language: &Language) -> Result<TSLanguage, Error> {
9 match language {
10 #[cfg(feature = "python")]
11 Language::Python => Ok(tree_sitter_python::LANGUAGE.into()),
12 #[cfg(feature = "rust_lang")]
13 Language::Rust => Ok(tree_sitter_rust::LANGUAGE.into()),
14 #[cfg(feature = "javascript")]
15 Language::JavaScript => Ok(tree_sitter_javascript::LANGUAGE.into()),
16 #[cfg(feature = "typescript")]
17 Language::TypeScript => Ok(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
18 #[cfg(feature = "java")]
19 Language::Java => Ok(tree_sitter_java::LANGUAGE.into()),
20 #[cfg(feature = "c")]
21 Language::C => Ok(tree_sitter_c::LANGUAGE.into()),
22 #[cfg(feature = "cpp")]
23 Language::Cpp => Ok(tree_sitter_cpp::LANGUAGE.into()),
24 #[cfg(feature = "go")]
25 Language::Go => Ok(tree_sitter_go::LANGUAGE.into()),
26 _ => Err(Error::UnsupportedLanguage(format!("{:?}", language))),
27 }
28}
29
30pub fn detect_language_by_extension(file_path: &str) -> Option<Language> {
32 let path = Path::new(file_path);
33 let extension = path.extension()?.to_str()?.to_lowercase();
34
35 match extension.as_str() {
36 "py" | "pyw" | "pyi" => Some(Language::Python),
37 "rs" => Some(Language::Rust),
38 "js" | "mjs" | "cjs" => Some(Language::JavaScript),
39 "ts" | "mts" | "cts" => Some(Language::TypeScript),
40 "java" => Some(Language::Java),
41 "c" | "h" => Some(Language::C),
42 "cpp" | "cc" | "cxx" | "c++" | "hpp" | "hh" | "hxx" | "h++" => Some(Language::Cpp),
43 "go" => Some(Language::Go),
44 "cs" => Some(Language::CSharp),
45 "php" | "phtml" | "php3" | "php4" | "php5" | "phps" => Some(Language::Php),
46 "rb" | "rbw" => Some(Language::Ruby),
47 "swift" => Some(Language::Swift),
48 "kt" | "kts" => Some(Language::Kotlin),
49 "scala" | "sc" => Some(Language::Scala),
50 "hs" | "lhs" => Some(Language::Haskell),
51 "lua" => Some(Language::Lua),
52 "pl" | "pm" | "t" | "pod" => Some(Language::Perl),
53 "r" | "R" => Some(Language::R),
54 "sh" | "bash" | "zsh" | "fish" => Some(Language::Bash),
55 "ps1" | "psm1" | "psd1" => Some(Language::PowerShell),
56 "html" | "htm" | "xhtml" => Some(Language::Html),
57 "css" => Some(Language::Css),
58 "sql" => Some(Language::Sql),
59 "json" => Some(Language::Json),
60 "yaml" | "yml" => Some(Language::Yaml),
61 "toml" => Some(Language::Toml),
62 "xml" | "xsd" | "xsl" | "xslt" => Some(Language::Xml),
63 _ => None,
64 }
65}
66
67pub fn detect_language_by_shebang(content: &str) -> Option<Language> {
69 let first_line = content.lines().next()?;
70 if !first_line.starts_with("#!") {
71 return None;
72 }
73
74 let shebang = first_line.to_lowercase();
75
76 if shebang.contains("python") {
77 Some(Language::Python)
78 } else if shebang.contains("node") {
79 Some(Language::JavaScript)
80 } else if shebang.contains("bash") || shebang.contains("/bin/sh") {
81 Some(Language::Bash)
82 } else if shebang.contains("ruby") {
83 Some(Language::Ruby)
84 } else if shebang.contains("perl") {
85 Some(Language::Perl)
86 } else if shebang.contains("php") {
87 Some(Language::Php)
88 } else {
89 None
90 }
91}
92
93pub fn detect_language_by_content(content: &str) -> Option<Language> {
95 let content_lower = content.to_lowercase();
97
98 if content_lower.contains("def ") && content_lower.contains("import ") {
100 return Some(Language::Python);
101 }
102
103 if content_lower.contains("fn ") && content_lower.contains("use ") {
104 return Some(Language::Rust);
105 }
106
107 if content_lower.contains("function ") && content_lower.contains("var ") {
108 return Some(Language::JavaScript);
109 }
110
111 if content_lower.contains("public class ") && content_lower.contains("import ") {
112 return Some(Language::Java);
113 }
114
115 if content_lower.contains("#include") && content_lower.contains("int main") {
116 return Some(Language::C);
117 }
118
119 None
120}
121
122pub fn detect_language(file_path: &str, content: Option<&str>) -> Option<Language> {
124 if let Some(lang) = detect_language_by_extension(file_path) {
126 return Some(lang);
127 }
128
129 if let Some(content) = content {
131 if let Some(lang) = detect_language_by_shebang(content) {
132 return Some(lang);
133 }
134
135 if let Some(lang) = detect_language_by_content(content) {
136 return Some(lang);
137 }
138 }
139
140 None
141}
142
143pub fn get_supported_node_types(language: &Language) -> Vec<String> {
145 match language {
146 Language::Python => vec![
147 "function_definition".to_string(),
148 "class_definition".to_string(),
149 "import_statement".to_string(),
150 "import_from_statement".to_string(),
151 "assignment".to_string(),
152 "decorated_definition".to_string(),
153 ],
154 Language::Rust => vec![
155 "function_item".to_string(),
156 "struct_item".to_string(),
157 "enum_item".to_string(),
158 "impl_item".to_string(),
159 "trait_item".to_string(),
160 "mod_item".to_string(),
161 "use_declaration".to_string(),
162 "const_item".to_string(),
163 "static_item".to_string(),
164 ],
165 Language::JavaScript => vec![
166 "function_declaration".to_string(),
167 "function_expression".to_string(),
168 "arrow_function".to_string(),
169 "class_declaration".to_string(),
170 "method_definition".to_string(),
171 "variable_declaration".to_string(),
172 "import_statement".to_string(),
173 "export_statement".to_string(),
174 ],
175 Language::TypeScript => vec![
176 "function_declaration".to_string(),
177 "function_expression".to_string(),
178 "arrow_function".to_string(),
179 "class_declaration".to_string(),
180 "interface_declaration".to_string(),
181 "type_alias_declaration".to_string(),
182 "method_definition".to_string(),
183 "variable_declaration".to_string(),
184 "import_statement".to_string(),
185 "export_statement".to_string(),
186 ],
187 Language::Java => vec![
188 "class_declaration".to_string(),
189 "interface_declaration".to_string(),
190 "method_declaration".to_string(),
191 "constructor_declaration".to_string(),
192 "field_declaration".to_string(),
193 "import_declaration".to_string(),
194 "package_declaration".to_string(),
195 ],
196 Language::C => vec![
197 "function_definition".to_string(),
198 "declaration".to_string(),
199 "struct_specifier".to_string(),
200 "union_specifier".to_string(),
201 "enum_specifier".to_string(),
202 "preproc_include".to_string(),
203 "preproc_define".to_string(),
204 ],
205 Language::Cpp => vec![
206 "function_definition".to_string(),
207 "declaration".to_string(),
208 "class_specifier".to_string(),
209 "struct_specifier".to_string(),
210 "union_specifier".to_string(),
211 "enum_specifier".to_string(),
212 "namespace_definition".to_string(),
213 "preproc_include".to_string(),
214 "preproc_define".to_string(),
215 ],
216 Language::Go => vec![
217 "function_declaration".to_string(),
218 "method_declaration".to_string(),
219 "type_declaration".to_string(),
220 "var_declaration".to_string(),
221 "const_declaration".to_string(),
222 "import_declaration".to_string(),
223 "package_clause".to_string(),
224 ],
225 _ => vec![], }
227}