1use anyhow::{Result, anyhow};
4use std::collections::HashMap;
5use std::fs;
6use std::path::{Path, PathBuf};
7use streaming_iterator::StreamingIterator;
8use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
9
10#[derive(Debug, Clone)]
12pub struct Definition {
13 pub name: String,
14 pub start_byte: usize,
15 pub end_byte: usize,
16 pub source: String,
17 pub file_path: Option<PathBuf>,
18 pub line_number: Option<usize>,
19}
20
21#[derive(Debug, Clone)]
23pub struct Context {
24 pub definitions: Vec<Definition>,
25 pub references: Vec<Definition>,
26}
27
28pub struct CodeParser {
30 pub files: HashMap<PathBuf, String>,
31 pub parser: Parser,
32}
33
34impl CodeParser {
35 pub fn new() -> Result<Self> {
37 Ok(Self {
38 files: HashMap::new(),
39 parser: Parser::new(),
40 })
41 }
42
43 pub fn add_file(&mut self, path: &Path) -> Result<()> {
45 let content = fs::read_to_string(path)
46 .map_err(|e| anyhow!("Failed to read file: {}: {}", path.display(), e))?;
47 self.files.insert(path.to_path_buf(), content);
48 Ok(())
49 }
50
51 #[must_use]
53 pub fn get_language(&self, path: &Path) -> Option<Language> {
54 let extension = path.extension().and_then(|ext| ext.to_str());
55 match extension {
56 Some("c") | Some("h") => Some(tree_sitter_c::LANGUAGE.into()),
57 Some("cpp") | Some("cxx") | Some("cc") | Some("hpp") | Some("hxx") => {
58 Some(tree_sitter_cpp::LANGUAGE.into())
59 }
60 Some("py") => Some(tree_sitter_python::LANGUAGE.into()),
61 Some("js") => Some(tree_sitter_javascript::LANGUAGE.into()),
62 Some("ts") => Some(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
63 Some("tsx") => Some(tree_sitter_typescript::LANGUAGE_TSX.into()),
64 Some("java") => Some(tree_sitter_java::LANGUAGE.into()),
65 Some("rs") => Some(tree_sitter_rust::LANGUAGE.into()),
66 Some("go") => Some(tree_sitter_go::LANGUAGE.into()),
67 Some("rb") => Some(tree_sitter_ruby::LANGUAGE.into()),
68 Some("tf") | Some("hcl") => Some(tree_sitter_hcl::LANGUAGE.into()),
69 Some("php") | Some("php3") | Some("php4") | Some("php5") | Some("phtml") => {
70 Some(tree_sitter_php::LANGUAGE_PHP.into())
71 }
72 _ => None,
73 }
74 }
75
76 fn language_to_name(language: &Language) -> Option<&'static str> {
78 let ts_c: Language = tree_sitter_c::LANGUAGE.into();
79 let ts_cpp: Language = tree_sitter_cpp::LANGUAGE.into();
80 let ts_python: Language = tree_sitter_python::LANGUAGE.into();
81 let ts_javascript: Language = tree_sitter_javascript::LANGUAGE.into();
82 let ts_typescript: Language = tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into();
83 let ts_tsx: Language = tree_sitter_typescript::LANGUAGE_TSX.into();
84 let ts_java: Language = tree_sitter_java::LANGUAGE.into();
85 let ts_go: Language = tree_sitter_go::LANGUAGE.into();
86 let ts_rust: Language = tree_sitter_rust::LANGUAGE.into();
87 let ts_ruby: Language = tree_sitter_ruby::LANGUAGE.into();
88 let ts_hcl: Language = tree_sitter_hcl::LANGUAGE.into();
89 let ts_php: Language = tree_sitter_php::LANGUAGE_PHP.into();
90
91 if language == &ts_c {
92 Some("c")
93 } else if language == &ts_cpp {
94 Some("cpp")
95 } else if language == &ts_python {
96 Some("python")
97 } else if language == &ts_javascript {
98 Some("javascript")
99 } else if language == &ts_typescript || language == &ts_tsx {
100 Some("typescript")
101 } else if language == &ts_java {
102 Some("java")
103 } else if language == &ts_go {
104 Some("go")
105 } else if language == &ts_rust {
106 Some("rust")
107 } else if language == &ts_ruby {
108 Some("ruby")
109 } else if language == &ts_hcl {
110 Some("terraform")
111 } else if language == &ts_php {
112 Some("php")
113 } else {
114 None
115 }
116 }
117
118 pub fn get_query_content(&self, language: &Language, query_name: &str) -> Result<&'static str> {
120 let lang_name = Self::language_to_name(language)
121 .ok_or_else(|| anyhow!("Unsupported language for queries"))?;
122
123 if query_name.contains('/') || query_name.contains('\\') || query_name.contains("..") {
124 return Err(anyhow!("Invalid query name: {}", query_name));
125 }
126
127 let query_content = match (lang_name, query_name) {
128 ("c", "definitions") => include_str!("queries/c/definitions.scm"),
129 ("c", "calls") => include_str!("queries/c/calls.scm"),
130 ("cpp", "definitions") => include_str!("queries/cpp/definitions.scm"),
131 ("cpp", "calls") => include_str!("queries/cpp/calls.scm"),
132 ("python", "definitions") => include_str!("queries/python/definitions.scm"),
133 ("python", "calls") => include_str!("queries/python/calls.scm"),
134 ("javascript", "definitions") => include_str!("queries/javascript/definitions.scm"),
135 ("javascript", "calls") => include_str!("queries/javascript/calls.scm"),
136 ("typescript", "definitions") => include_str!("queries/typescript/definitions.scm"),
137 ("typescript", "calls") => include_str!("queries/typescript/calls.scm"),
138 ("java", "definitions") => include_str!("queries/java/definitions.scm"),
139 ("java", "calls") => include_str!("queries/java/calls.scm"),
140 ("go", "definitions") => include_str!("queries/go/definitions.scm"),
141 ("go", "calls") => include_str!("queries/go/calls.scm"),
142 ("rust", "definitions") => include_str!("queries/rust/definitions.scm"),
143 ("rust", "calls") => include_str!("queries/rust/calls.scm"),
144 ("ruby", "definitions") => include_str!("queries/ruby/definitions.scm"),
145 ("ruby", "calls") => include_str!("queries/ruby/calls.scm"),
146 ("terraform", "definitions") => include_str!("queries/terraform/definitions.scm"),
147 ("terraform", "calls") => include_str!("queries/terraform/calls.scm"),
148 ("php", "definitions") => include_str!("queries/php/definitions.scm"),
149 ("php", "calls") => include_str!("queries/php/calls.scm"),
150 (_, query) => return Err(anyhow!("Unsupported query: {} for {}", query, lang_name)),
151 };
152
153 Ok(query_content)
154 }
155
156 pub fn find_definition(
158 &mut self,
159 name: &str,
160 source_file: &Path,
161 ) -> Result<Option<(PathBuf, Definition)>> {
162 let content = self
163 .files
164 .get(source_file)
165 .ok_or_else(|| anyhow!("File not found in parser: {}", source_file.display()))?;
166
167 let language = match self.get_language(source_file) {
168 Some(lang) => lang,
169 None => return Ok(None),
170 };
171
172 self.parser
173 .set_language(&language)
174 .map_err(|e| anyhow!("Failed to set language: {}", e))?;
175
176 let tree = self
177 .parser
178 .parse(content, None)
179 .ok_or_else(|| anyhow!("Failed to parse file: {}", source_file.display()))?;
180
181 let query_str = self.get_query_content(&language, "definitions")?;
182
183 let query = Query::new(&language, query_str)
184 .map_err(|e| anyhow!("Failed to create query: {}", e))?;
185
186 let mut query_cursor = QueryCursor::new();
187 let mut matches = query_cursor.matches(&query, tree.root_node(), content.as_bytes());
188
189 while let Some(mat) = matches.next() {
190 let mut definition_node: Option<Node> = None;
191 let mut name_node: Option<Node> = None;
192
193 for cap in mat.captures {
194 let capture_name = &query.capture_names()[cap.index as usize];
195 match capture_name {
196 s if *s == "definition" => definition_node = Some(cap.node),
197 s if *s == "name" => name_node = Some(cap.node),
198 _ => {}
199 }
200 }
201
202 if let (Some(def_node), Some(name_node_inner)) = (definition_node, name_node)
203 && name_node_inner.utf8_text(content.as_bytes())? == name
204 {
205 let start_byte = def_node.start_byte();
206 let end_byte = def_node.end_byte();
207 let source = def_node.utf8_text(content.as_bytes())?.to_string();
208
209 let line_number = content[..start_byte].matches('\n').count() + 1;
210 let definition = Definition {
211 name: name.to_string(),
212 start_byte,
213 end_byte,
214 source,
215 file_path: Some(source_file.to_path_buf()),
216 line_number: Some(line_number),
217 };
218 return Ok(Some((source_file.to_path_buf(), definition)));
219 }
220 }
221
222 Ok(None)
223 }
224
225 pub fn find_calls(&mut self, name: &str) -> Result<Vec<(PathBuf, Definition, String)>> {
227 let mut results = Vec::new();
228
229 for (file_path, content) in &self.files {
230 let language = match self.get_language(file_path) {
231 Some(lang) => lang,
232 None => continue,
233 };
234
235 self.parser.set_language(&language).map_err(|e| {
236 anyhow!("Failed to set language for {}: {}", file_path.display(), e)
237 })?;
238
239 let tree = match self.parser.parse(content, None) {
240 Some(t) => t,
241 None => {
242 eprintln!("Warning: Failed to parse file: {}", file_path.display());
243 continue;
244 }
245 };
246
247 let query_str = match self.get_query_content(&language, "calls") {
248 Ok(s) => s,
249 Err(e) => {
250 eprintln!(
251 "Warning: Failed to get calls query for {}: {}",
252 file_path.display(),
253 e
254 );
255 continue;
256 }
257 };
258
259 let query = match Query::new(&language, query_str) {
260 Ok(q) => q,
261 Err(e) => {
262 eprintln!("Warning: Failed to create calls query: {}", e);
263 continue;
264 }
265 };
266
267 let mut query_cursor = QueryCursor::new();
268 let mut matches = query_cursor.matches(&query, tree.root_node(), content.as_bytes());
269
270 while let Some(mat) = matches.next() {
271 for cap in mat.captures {
272 let capture_name = query.capture_names()[cap.index as usize];
273 let valid_captures = [
274 "direct_call",
275 "method_call",
276 "macro_call",
277 "reference",
278 "callback",
279 "import",
280 "assignment",
281 ];
282
283 if valid_captures.contains(&capture_name) {
284 let node = cap.node;
285 if node.utf8_text(content.as_bytes())? == name {
286 let start_byte = node.start_byte();
287 let end_byte = node.end_byte();
288 let source = name.to_string();
289 let line_number = content[..start_byte].matches('\n').count() + 1;
290
291 results.push((
292 file_path.clone(),
293 Definition {
294 name: name.to_string(),
295 start_byte,
296 end_byte,
297 source,
298 file_path: Some(file_path.clone()),
299 line_number: Some(line_number),
300 },
301 capture_name.to_string(),
302 ));
303 }
304 }
305 }
306 }
307 }
308
309 Ok(results)
310 }
311
312 pub fn find_bidirectional(
314 &mut self,
315 name: &str,
316 source_file: &Path,
317 ) -> Result<Vec<(PathBuf, Definition)>> {
318 let mut results = Vec::new();
319
320 if let Some(definition) = self.find_definition(name, source_file)? {
321 results.push(definition);
322 }
323
324 let calls = self.find_calls(name)?;
325 results.extend(calls.into_iter().map(|(path, def, _)| (path, def)));
326
327 results.sort_by_key(|(path, def)| (path.clone(), def.start_byte));
328 results.dedup_by_key(|(path, def)| (path.clone(), def.start_byte));
329
330 Ok(results)
331 }
332
333 pub fn build_context_from_file(&mut self, start_path: &Path) -> Result<Context> {
335 use std::collections::HashSet;
336
337 let mut collected: HashSet<String> = HashSet::new();
338 let mut definitions: Vec<Definition> = Vec::new();
339 let mut references: Vec<Definition> = Vec::new();
340
341 let file_content = self
342 .files
343 .get(start_path)
344 .ok_or_else(|| anyhow!("File not found: {}", start_path.display()))?;
345
346 let language = match self.get_language(start_path) {
347 Some(lang) => lang,
348 None => {
349 return Ok(Context {
350 definitions: Vec::new(),
351 references: Vec::new(),
352 });
353 }
354 };
355
356 self.parser
357 .set_language(&language)
358 .map_err(|e| anyhow!("Failed to set language: {}", e))?;
359
360 let tree = self
361 .parser
362 .parse(file_content, None)
363 .ok_or_else(|| anyhow!("Failed to parse: {}", start_path.display()))?;
364
365 let definitions_query_str = self.get_query_content(&language, "definitions")?;
366 let definitions_query = Query::new(&language, definitions_query_str)?;
367
368 let mut query_cursor = QueryCursor::new();
369 let mut matches = query_cursor.matches(
370 &definitions_query,
371 tree.root_node(),
372 file_content.as_bytes(),
373 );
374
375 let mut to_visit: Vec<(PathBuf, String)> = Vec::new();
376
377 while let Some(mat) = matches.next() {
378 let mut def_node: Option<Node> = None;
379 let mut name_node: Option<Node> = None;
380 for cap in mat.captures {
381 let capture_name = &definitions_query.capture_names()[cap.index as usize];
382 match &capture_name[..] {
383 "definition" => def_node = Some(cap.node),
384 "name" => name_node = Some(cap.node),
385 _ => {}
386 }
387 }
388 if let (Some(def_node), Some(name_node)) = (def_node, name_node) {
389 let name = name_node.utf8_text(file_content.as_bytes())?.to_string();
390 if !collected.contains(&name) {
391 let start_byte = def_node.start_byte();
392 let end_byte = def_node.end_byte();
393 let source = def_node.utf8_text(file_content.as_bytes())?.to_string();
394 let line_number = file_content[..start_byte].matches('\n').count() + 1;
395 definitions.push(Definition {
396 name: name.clone(),
397 start_byte,
398 end_byte,
399 source,
400 file_path: Some(start_path.to_path_buf()),
401 line_number: Some(line_number),
402 });
403 collected.insert(name.clone());
404 to_visit.push((start_path.to_path_buf(), name));
405 }
406 }
407 }
408
409 let references_query_str = match self.get_query_content(&language, "calls") {
410 Ok(s) => s,
411 Err(_) => {
412 return Ok(Context {
413 definitions,
414 references,
415 });
416 }
417 };
418
419 let references_query = match Query::new(&language, references_query_str) {
420 Ok(q) => q,
421 Err(_) => {
422 return Ok(Context {
423 definitions,
424 references,
425 });
426 }
427 };
428
429 let mut references_cursor = QueryCursor::new();
430 let mut ref_matches =
431 references_cursor.matches(&references_query, tree.root_node(), file_content.as_bytes());
432
433 while let Some(mat) = ref_matches.next() {
434 for cap in mat.captures {
435 let capture_name = &references_query.capture_names()[cap.index as usize];
436 if [
437 "direct_call",
438 "method_call",
439 "macro_call",
440 "reference",
441 "callback",
442 "import",
443 "assignment",
444 ]
445 .contains(capture_name)
446 {
447 let node = cap.node;
448 let name = node.utf8_text(file_content.as_bytes())?.to_string();
449 let start_byte = node.start_byte();
450 let end_byte = node.end_byte();
451 let source = node.utf8_text(file_content.as_bytes())?.to_string();
452 let line_number = file_content[..start_byte].matches('\n').count() + 1;
453
454 references.push(Definition {
455 name,
456 start_byte,
457 end_byte,
458 source,
459 file_path: Some(start_path.to_path_buf()),
460 line_number: Some(line_number),
461 });
462 }
463 }
464 }
465
466 while let Some((file_path, func_name)) = to_visit.pop() {
467 if let Some((_, def)) = self.find_definition(&func_name, &file_path)? {
468 let refs = self.find_calls(&def.name)?;
469 for (ref_file, ref_def, _) in refs {
470 if !collected.contains(&ref_def.name) {
471 definitions.push(ref_def.clone());
472 collected.insert(ref_def.name.clone());
473 to_visit.push((ref_file, ref_def.name.clone()));
474 }
475 }
476 }
477 }
478
479 Ok(Context {
480 definitions,
481 references,
482 })
483 }
484}
485
486impl Default for CodeParser {
487 fn default() -> Self {
488 Self {
489 files: HashMap::new(),
490 parser: Parser::new(),
491 }
492 }
493}