lynx_parser/symbol_extraction/
javascript.rs1use anyhow::Result;
2use lynx_protocol::{CodeChunk, SymbolRecord};
3use std::path::Path;
4use tree_sitter::{Parser, Query, QueryCursor, StreamingIterator};
5use tree_sitter_javascript::LANGUAGE;
6
7pub fn extract(path: &Path, content: &str) -> Result<(Vec<CodeChunk>, Vec<SymbolRecord>)> {
8 let mut parser = Parser::new();
9 parser.set_language(&LANGUAGE.into())?;
10
11 let tree = parser
12 .parse(content, None)
13 .ok_or_else(|| anyhow::anyhow!("Failed to parse JavaScript file"))?;
14 let root_node = tree.root_node();
15
16 let mut chunks = Vec::new();
17 let mut symbols = Vec::new();
18
19 let query_str = r#"
20 (function_declaration name: (identifier) @func_name) @func
21 (class_declaration name: (identifier) @class_name) @class
22 (method_definition name: (property_identifier) @method_name) @method
23 "#;
24
25 let query = Query::new(&LANGUAGE.into(), query_str)?;
26 let mut cursor = QueryCursor::new();
27 let mut captures = cursor.captures(&query, root_node, content.as_bytes());
28
29 while let Some(&(ref mat, capture_index)) = captures.next() {
30 let capture = mat.captures[capture_index];
31 let capture_name = query.capture_names()[capture.index as usize];
32
33 if !["func", "class", "method"].contains(&capture_name) {
34 continue;
35 }
36
37 let node = capture.node;
38 let start_line = node.start_position().row + 1;
39 let end_line = node.end_position().row + 1;
40 let raw_content = node.utf8_text(content.as_bytes())?.to_string();
41
42 let symbol_name = match resolve_symbol_name(mat, node, &query, content.as_bytes()) {
43 Some(name) => name,
44 None => continue,
45 };
46
47 let file_path = path.to_string_lossy().replace('\\', "/");
48 let symbol_id = format!("{}:{}:{}", capture_name, file_path, symbol_name);
49
50 symbols.push(SymbolRecord {
51 symbol_id: symbol_id.clone(),
52 symbol_name: symbol_name.clone(),
53 file_path: file_path.clone(),
54 start_line,
55 end_line,
56 });
57
58 chunks.push(CodeChunk {
59 id: blake3::hash(raw_content.as_bytes()).to_string(),
60 file_path: file_path.clone(),
61 start_line,
62 end_line,
63 raw_content,
64 symbols_defined: vec![symbol_id],
65 });
66 }
67
68 Ok((chunks, symbols))
69}
70
71fn resolve_symbol_name(
72 mat: &tree_sitter::QueryMatch,
73 node: tree_sitter::Node,
74 query: &Query,
75 content: &[u8],
76) -> Option<String> {
77 if let Some(capture) = mat.captures.iter().find(|c| {
78 let name = query.capture_names()[c.index as usize];
79 name.ends_with("_name")
80 }) {
81 if let Ok(text) = capture.node.utf8_text(content) {
82 return Some(text.to_string());
83 }
84 }
85
86 if let Some(name_node) = node
87 .child_by_field_name("name")
88 .or_else(|| node.child_by_field_name("type"))
89 {
90 if let Ok(text) = name_node.utf8_text(content) {
91 return Some(text.to_string());
92 }
93 }
94
95 find_identifier_in_node(node, content)
96}
97
98fn find_identifier_in_node(node: tree_sitter::Node, content: &[u8]) -> Option<String> {
99 let mut cursor = node.walk();
100 for child in node.named_children(&mut cursor) {
101 if matches!(
102 child.kind(),
103 "identifier" | "type_identifier" | "field_identifier" | "property_identifier"
104 ) {
105 if let Ok(text) = child.utf8_text(content) {
106 return Some(text.to_string());
107 }
108 }
109 if let Some(name) = find_identifier_in_node(child, content) {
110 return Some(name);
111 }
112 }
113 None
114}