Skip to main content

lynx_parser/symbol_extraction/
javascript.rs

1use anyhow::Result;
2use lynx_protocol::{CodeChunk, SymbolRecord};
3use std::path::Path;
4use tree_sitter::{Parser, Query, QueryCursor, StreamingIterator};
5use tree_sitter_javascript::LANGUAGE;
6
7pub fn extract(path: &Path, content: &str) -> Result<(Vec<CodeChunk>, Vec<SymbolRecord>)> {
8    let mut parser = Parser::new();
9    parser.set_language(&LANGUAGE.into())?;
10
11    let tree = parser
12        .parse(content, None)
13        .ok_or_else(|| anyhow::anyhow!("Failed to parse JavaScript file"))?;
14    let root_node = tree.root_node();
15
16    let mut chunks = Vec::new();
17    let mut symbols = Vec::new();
18
19    let query_str = r#"
20        (function_declaration name: (identifier) @func_name) @func
21        (class_declaration name: (identifier) @class_name) @class
22        (method_definition name: (property_identifier) @method_name) @method
23    "#;
24
25    let query = Query::new(&LANGUAGE.into(), query_str)?;
26    let mut cursor = QueryCursor::new();
27    let mut captures = cursor.captures(&query, root_node, content.as_bytes());
28
29    while let Some(&(ref mat, capture_index)) = captures.next() {
30        let capture = mat.captures[capture_index];
31        let capture_name = query.capture_names()[capture.index as usize];
32
33        if !["func", "class", "method"].contains(&capture_name) {
34            continue;
35        }
36
37        let node = capture.node;
38        let start_line = node.start_position().row + 1;
39        let end_line = node.end_position().row + 1;
40        let raw_content = node.utf8_text(content.as_bytes())?.to_string();
41
42        let symbol_name = match resolve_symbol_name(mat, node, &query, content.as_bytes()) {
43            Some(name) => name,
44            None => continue,
45        };
46
47        let file_path = path.to_string_lossy().replace('\\', "/");
48        let symbol_id = format!("{}:{}:{}", capture_name, file_path, symbol_name);
49
50        symbols.push(SymbolRecord {
51            symbol_id: symbol_id.clone(),
52            symbol_name: symbol_name.clone(),
53            file_path: file_path.clone(),
54            start_line,
55            end_line,
56        });
57
58        chunks.push(CodeChunk {
59            id: blake3::hash(raw_content.as_bytes()).to_string(),
60            file_path: file_path.clone(),
61            start_line,
62            end_line,
63            raw_content,
64            symbols_defined: vec![symbol_id],
65        });
66    }
67
68    Ok((chunks, symbols))
69}
70
71fn resolve_symbol_name(
72    mat: &tree_sitter::QueryMatch,
73    node: tree_sitter::Node,
74    query: &Query,
75    content: &[u8],
76) -> Option<String> {
77    if let Some(capture) = mat.captures.iter().find(|c| {
78        let name = query.capture_names()[c.index as usize];
79        name.ends_with("_name")
80    }) {
81        if let Ok(text) = capture.node.utf8_text(content) {
82            return Some(text.to_string());
83        }
84    }
85
86    if let Some(name_node) = node
87        .child_by_field_name("name")
88        .or_else(|| node.child_by_field_name("type"))
89    {
90        if let Ok(text) = name_node.utf8_text(content) {
91            return Some(text.to_string());
92        }
93    }
94
95    find_identifier_in_node(node, content)
96}
97
98fn find_identifier_in_node(node: tree_sitter::Node, content: &[u8]) -> Option<String> {
99    let mut cursor = node.walk();
100    for child in node.named_children(&mut cursor) {
101        if matches!(
102            child.kind(),
103            "identifier" | "type_identifier" | "field_identifier" | "property_identifier"
104        ) {
105            if let Ok(text) = child.utf8_text(content) {
106                return Some(text.to_string());
107            }
108        }
109        if let Some(name) = find_identifier_in_node(child, content) {
110            return Some(name);
111        }
112    }
113    None
114}