1pub mod parser;
8
9use anyhow::Result;
10use std::path::Path;
11
12use tree_sitter_bash;
14use tree_sitter_css;
15use tree_sitter_go;
16use tree_sitter_html;
17use tree_sitter_java;
18use tree_sitter_kotlin_ng;
19use tree_sitter_python;
20use tree_sitter_rust;
21use tree_sitter_typescript;
22
23use crate::lsp::symbols::SymbolInfo;
24pub use parser::has_syntax_errors;
25pub use parser::DocstringInfo;
26
27pub fn extract_symbols(path: &Path) -> Result<Vec<SymbolInfo>> {
32 let source = std::fs::read_to_string(path)?;
33 let language = detect_language(path);
34 parser::extract_symbols_from_source(&source, language, path)
35}
36
37pub fn extract_symbols_from_text(text: &str, path: &Path) -> Result<Vec<SymbolInfo>> {
42 let language = detect_language(path);
43 parser::extract_symbols_from_source(text, language, path)
44}
45
46pub fn extract_docstrings(path: &Path) -> Result<Vec<DocstringInfo>> {
48 let source = std::fs::read_to_string(path)?;
49 let language = detect_language(path);
50 parser::extract_docstrings_from_source(&source, language, path)
51}
52
53pub fn detect_language(path: &Path) -> Option<&'static str> {
62 match path.extension()?.to_str()? {
63 "rs" => Some("rust"),
64 "py" => Some("python"),
65 "ts" => Some("typescript"),
66 "tsx" => Some("tsx"),
67 "js" | "cjs" | "mjs" => Some("javascript"),
68 "jsx" => Some("jsx"),
69 "go" => Some("go"),
70 "java" => Some("java"),
71 "kt" | "kts" => Some("kotlin"),
72 "c" => Some("c"),
73 "cpp" | "cc" | "cxx" => Some("cpp"),
74 "cs" => Some("csharp"),
75 "rb" => Some("ruby"),
76 "html" | "htm" => Some("html"),
77 "css" => Some("css"),
78 "scss" => Some("scss"),
79 "less" => Some("less"),
80 "php" => Some("php"),
81 "swift" => Some("swift"),
82 "scala" => Some("scala"),
83 "ex" | "exs" => Some("elixir"),
84 "hs" => Some("haskell"),
85 "lua" => Some("lua"),
86 "sh" | "bash" => Some("bash"),
87 "md" | "markdown" => Some("markdown"),
88 _ => None,
89 }
90}
91
92pub(crate) fn get_ts_language(lang: &str) -> Option<tree_sitter::Language> {
101 match lang.to_ascii_lowercase().as_str() {
102 "rust" => Some(tree_sitter_rust::LANGUAGE.into()),
103 "python" => Some(tree_sitter_python::LANGUAGE.into()),
104 "go" => Some(tree_sitter_go::LANGUAGE.into()),
105 "typescript" | "javascript" => Some(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
106 "tsx" | "jsx" => Some(tree_sitter_typescript::LANGUAGE_TSX.into()),
107 "java" => Some(tree_sitter_java::LANGUAGE.into()),
108 "kotlin" => Some(tree_sitter_kotlin_ng::LANGUAGE.into()),
109 "html" => Some(tree_sitter_html::LANGUAGE.into()),
110 "css" | "scss" | "less" => Some(tree_sitter_css::LANGUAGE.into()),
111 "bash" => Some(tree_sitter_bash::LANGUAGE.into()),
112 _ => None,
113 }
114}
115
116#[cfg(test)]
117mod tests {
118 use super::*;
119 use std::path::Path;
120
121 #[test]
130 fn detect_language_vs_get_ts_language_contract() {
131 let detected_samples: &[(&str, &str)] = &[
133 ("a.rs", "rust"),
134 ("a.py", "python"),
135 ("a.ts", "typescript"),
136 ("a.tsx", "tsx"),
137 ("a.js", "javascript"),
138 ("a.jsx", "jsx"),
139 ("a.go", "go"),
140 ("a.java", "java"),
141 ("a.kt", "kotlin"),
142 ("a.c", "c"),
143 ("a.cpp", "cpp"),
144 ("a.cs", "csharp"),
145 ("a.rb", "ruby"),
146 ("a.html", "html"),
147 ("a.css", "css"),
148 ("a.scss", "scss"),
149 ("a.less", "less"),
150 ("a.php", "php"),
151 ("a.swift", "swift"),
152 ("a.scala", "scala"),
153 ("a.ex", "elixir"),
154 ("a.hs", "haskell"),
155 ("a.lua", "lua"),
156 ("a.sh", "bash"),
157 ("a.md", "markdown"),
158 ];
159
160 for (path, expected_lang) in detected_samples {
161 let actual = detect_language(Path::new(path));
162 assert_eq!(
163 actual,
164 Some(*expected_lang),
165 "detect_language({path}) should return Some({expected_lang})"
166 );
167 }
168
169 let with_ast = &[
171 "rust",
172 "python",
173 "typescript",
174 "javascript",
175 "tsx",
176 "jsx",
177 "go",
178 "java",
179 "kotlin",
180 "html",
181 "css",
182 "scss",
183 "less",
184 "bash",
185 ];
186 for lang in with_ast {
187 assert!(
188 get_ts_language(lang).is_some(),
189 "expected AST support for {lang}"
190 );
191 }
192
193 let without_ast = &[
199 "c", "cpp", "csharp", "ruby", "php", "swift", "scala", "elixir", "haskell", "lua",
200 "markdown",
201 ];
202 for lang in without_ast {
203 assert!(
204 get_ts_language(lang).is_none(),
205 "{lang} unexpectedly has AST support — move it to with_ast"
206 );
207 }
208 }
209}