1use std::cell::RefCell;
8use std::collections::HashMap;
9use std::rc::Rc;
10use tree_sitter::{Language, Node, Parser, Tree};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14pub enum TSLanguage {
15 Rust,
16 Python,
17 JavaScript,
18 TypeScript,
19 TypeScriptTsx,
20 Go,
21 C,
22 Cpp,
23 Java,
24 Json,
25 Css,
26 Bash,
27}
28
29impl TSLanguage {
30 pub fn get_language(&self) -> Language {
32 match self {
33 TSLanguage::Rust => tree_sitter_rust::language(),
34 TSLanguage::Python => tree_sitter_python::language(),
35 TSLanguage::JavaScript => tree_sitter_javascript::language(),
36 TSLanguage::TypeScript => tree_sitter_typescript::language_typescript(),
37 TSLanguage::TypeScriptTsx => tree_sitter_typescript::language_tsx(),
38 TSLanguage::Go => tree_sitter_go::language(),
39 TSLanguage::C => tree_sitter_c::language(),
40 TSLanguage::Cpp => tree_sitter_cpp::language(),
41 TSLanguage::Java => tree_sitter_java::language(),
42 TSLanguage::Json => tree_sitter_json::language(),
43 TSLanguage::Css => tree_sitter_css::language(),
44 TSLanguage::Bash => tree_sitter_bash::language(),
45 }
46 }
47
48 pub fn from_str(s: &str) -> Option<Self> {
51 let name = s.rsplit("::").next().unwrap_or(s);
53
54 match name.to_lowercase().as_str() {
55 "rust" | "rs" | "sigil" => Some(TSLanguage::Rust),
56 "python" | "py" => Some(TSLanguage::Python),
57 "javascript" | "js" => Some(TSLanguage::JavaScript),
58 "typescript" | "ts" => Some(TSLanguage::TypeScript),
59 "tsx" | "typescripttsx" => Some(TSLanguage::TypeScriptTsx),
60 "go" | "golang" => Some(TSLanguage::Go),
61 "c" => Some(TSLanguage::C),
62 "cpp" | "c++" | "cxx" => Some(TSLanguage::Cpp),
63 "java" => Some(TSLanguage::Java),
64 "json" => Some(TSLanguage::Json),
65 "css" => Some(TSLanguage::Css),
66 "bash" | "sh" | "shell" => Some(TSLanguage::Bash),
67 "html" | "htm" => None, "kotlin" | "kt" => None,
70 "yaml" | "yml" => None,
71 "toml" => None,
72 "sql" => None,
73 "markdown" | "md" => None,
74 _ => None,
75 }
76 }
77
78 pub fn name(&self) -> &'static str {
80 match self {
81 TSLanguage::Rust => "Rust",
82 TSLanguage::Python => "Python",
83 TSLanguage::JavaScript => "JavaScript",
84 TSLanguage::TypeScript => "TypeScript",
85 TSLanguage::TypeScriptTsx => "TypeScriptTsx",
86 TSLanguage::Go => "Go",
87 TSLanguage::C => "C",
88 TSLanguage::Cpp => "Cpp",
89 TSLanguage::Java => "Java",
90 TSLanguage::Json => "Json",
91 TSLanguage::Css => "Css",
92 TSLanguage::Bash => "Bash",
93 }
94 }
95}
96
97pub struct TSParser {
99 parser: Parser,
100 language: TSLanguage,
101}
102
103impl TSParser {
104 pub fn new(language: TSLanguage) -> Result<Self, String> {
106 let mut parser = Parser::new();
107 parser
108 .set_language(language.get_language())
109 .map_err(|e| format!("Failed to set language: {:?}", e))?;
110
111 Ok(TSParser { parser, language })
112 }
113
114 pub fn parse(&mut self, source: &str) -> Result<TSTree, String> {
116 self.parser
117 .parse(source, None)
118 .map(|tree| TSTree {
119 tree,
120 source: source.to_string(),
121 language: self.language,
122 })
123 .ok_or_else(|| "Failed to parse source code".to_string())
124 }
125
126 pub fn language(&self) -> TSLanguage {
128 self.language
129 }
130}
131
132pub struct TSTree {
134 tree: Tree,
135 source: String,
136 language: TSLanguage,
137}
138
139impl TSTree {
140 pub fn root_node(&self) -> Node {
142 self.tree.root_node()
143 }
144
145 pub fn source(&self) -> &str {
147 &self.source
148 }
149
150 pub fn language(&self) -> TSLanguage {
152 self.language
153 }
154}
155
156pub fn parse_source(language_str: &str, source: &str) -> Result<TSTree, String> {
158 let language = TSLanguage::from_str(language_str)
159 .ok_or_else(|| format!("Unsupported language: {}", language_str))?;
160
161 let mut parser = TSParser::new(language)?;
162 parser.parse(source)
163}
164
165pub fn node_to_value(node: &Node) -> HashMap<String, crate::interpreter::Value> {
168 use crate::interpreter::Value;
169
170 let mut fields = HashMap::new();
171
172 fields.insert(
174 "kind".to_string(),
175 Value::String(Rc::new(node.kind().to_string())),
176 );
177 fields.insert("is_named".to_string(), Value::Bool(node.is_named()));
178 fields.insert("is_error".to_string(), Value::Bool(node.is_error()));
179 fields.insert("is_missing".to_string(), Value::Bool(node.is_missing()));
180
181 let start = node.start_position();
183 let end = node.end_position();
184
185 let mut start_fields = HashMap::new();
186 start_fields.insert("row".to_string(), Value::Int(start.row as i64));
187 start_fields.insert("column".to_string(), Value::Int(start.column as i64));
188 fields.insert(
189 "start".to_string(),
190 Value::Struct {
191 name: "Position".to_string(),
192 fields: Rc::new(RefCell::new(start_fields)),
193 },
194 );
195
196 let mut end_fields = HashMap::new();
197 end_fields.insert("row".to_string(), Value::Int(end.row as i64));
198 end_fields.insert("column".to_string(), Value::Int(end.column as i64));
199 fields.insert(
200 "end".to_string(),
201 Value::Struct {
202 name: "Position".to_string(),
203 fields: Rc::new(RefCell::new(end_fields)),
204 },
205 );
206
207 fields.insert(
209 "start_byte".to_string(),
210 Value::Int(node.start_byte() as i64),
211 );
212 fields.insert("end_byte".to_string(), Value::Int(node.end_byte() as i64));
213
214 fields.insert(
216 "child_count".to_string(),
217 Value::Int(node.child_count() as i64),
218 );
219 fields.insert(
220 "named_child_count".to_string(),
221 Value::Int(node.named_child_count() as i64),
222 );
223
224 let children: Vec<Value> = (0..node.child_count())
226 .filter_map(|i| node.child(i))
227 .map(|child| Value::Struct {
228 name: "SyntaxNode".to_string(),
229 fields: Rc::new(RefCell::new(node_to_value(&child))),
230 })
231 .collect();
232 fields.insert(
233 "children".to_string(),
234 Value::Array(Rc::new(RefCell::new(children))),
235 );
236
237 let named_children: Vec<Value> = (0..node.named_child_count())
239 .filter_map(|i| node.named_child(i))
240 .map(|child| Value::Struct {
241 name: "SyntaxNode".to_string(),
242 fields: Rc::new(RefCell::new(node_to_value(&child))),
243 })
244 .collect();
245 fields.insert(
246 "named_children".to_string(),
247 Value::Array(Rc::new(RefCell::new(named_children))),
248 );
249
250 fields
251}
252
253pub fn node_text<'a>(node: &Node, source: &'a str) -> &'a str {
255 &source[node.start_byte()..node.end_byte()]
256}
257
258pub fn supported_languages() -> Vec<&'static str> {
260 vec![
261 "Rust",
262 "Python",
263 "JavaScript",
264 "TypeScript",
265 "TypeScriptTsx",
266 "Go",
267 "C",
268 "Cpp",
269 "Java",
270 "Json",
271 "Css",
272 "Bash",
273 ]
274}
275
276#[cfg(test)]
277mod tests {
278 use super::*;
279
280 #[test]
281 fn test_parse_rust() {
282 let source = r#"
283fn main() {
284 println!("Hello, world!");
285}
286"#;
287 let tree = parse_source("rust", source).unwrap();
288 let root = tree.root_node();
289 assert_eq!(root.kind(), "source_file");
290 assert!(root.child_count() > 0);
291 }
292
293 #[test]
294 fn test_parse_python() {
295 let source = r#"
296def greet(name):
297 print(f"Hello, {name}!")
298
299greet("world")
300"#;
301 let tree = parse_source("python", source).unwrap();
302 let root = tree.root_node();
303 assert_eq!(root.kind(), "module");
304 }
305
306 #[test]
307 fn test_parse_javascript() {
308 let source = r#"
309function greet(name) {
310 console.log(`Hello, ${name}!`);
311}
312greet("world");
313"#;
314 let tree = parse_source("javascript", source).unwrap();
315 let root = tree.root_node();
316 assert_eq!(root.kind(), "program");
317 }
318
319 #[test]
320 fn test_language_from_str() {
321 assert_eq!(TSLanguage::from_str("rust"), Some(TSLanguage::Rust));
322 assert_eq!(TSLanguage::from_str("Rust"), Some(TSLanguage::Rust));
323 assert_eq!(
324 TSLanguage::from_str("Language::Rust"),
325 Some(TSLanguage::Rust)
326 );
327 assert_eq!(TSLanguage::from_str("python"), Some(TSLanguage::Python));
328 assert_eq!(TSLanguage::from_str("py"), Some(TSLanguage::Python));
329 assert_eq!(TSLanguage::from_str("unknown"), None);
330 }
331
332 #[test]
333 fn test_unsupported_languages() {
334 assert!(TSLanguage::from_str("kotlin").is_none());
335 assert!(TSLanguage::from_str("yaml").is_none());
336 assert!(TSLanguage::from_str("toml").is_none());
337 }
338}