use crate::common::safe_slice;
use crate::graph::canonical_fqn::FqnBuilder;
use crate::ingest::{ScopeSeparator, ScopeStack, SymbolFact, SymbolKind};
use crate::references::{CallFact, ReferenceFact};
use anyhow::Result;
use std::path::PathBuf;
pub struct JavaScriptParser {
pub(crate) parser: tree_sitter::Parser,
}
impl JavaScriptParser {
pub fn new() -> Result<Self> {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_javascript::language())?;
Ok(Self { parser })
}
pub(crate) fn from_parser(parser: tree_sitter::Parser) -> Self {
Self { parser }
}
pub fn extract_symbols(&mut self, file_path: PathBuf, source: &[u8]) -> Vec<SymbolFact> {
let tree = match self.parser.parse(source, None) {
Some(t) => t,
None => return Vec::new(), };
let root_node = tree.root_node();
let mut facts = Vec::new();
let mut scope_stack = ScopeStack::new(ScopeSeparator::Dot);
let package_name = ".";
self.walk_tree_with_scope(
&root_node,
source,
&file_path,
&mut facts,
&mut scope_stack,
package_name,
);
facts
}
fn walk_tree_with_scope(
&self,
node: &tree_sitter::Node,
source: &[u8],
file_path: &PathBuf,
facts: &mut Vec<SymbolFact>,
scope_stack: &mut ScopeStack,
package_name: &str,
) {
let kind = node.kind();
if kind == "export_statement" {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_tree_with_scope(
&child,
source,
file_path,
facts,
scope_stack,
package_name,
);
}
return;
}
if kind == "class_declaration" {
if let Some(name) = self.extract_name(node, source) {
if let Some(fact) =
self.extract_symbol_with_fqn(node, source, file_path, scope_stack, package_name)
{
facts.push(fact);
}
scope_stack.push(&name);
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_tree_with_scope(
&child,
source,
file_path,
facts,
scope_stack,
package_name,
);
}
scope_stack.pop();
return;
}
}
if let Some(fact) =
self.extract_symbol_with_fqn(node, source, file_path, scope_stack, package_name)
{
facts.push(fact);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_tree_with_scope(&child, source, file_path, facts, scope_stack, package_name);
}
}
fn extract_symbol_with_fqn(
&self,
node: &tree_sitter::Node,
source: &[u8],
file_path: &PathBuf,
scope_stack: &ScopeStack,
package_name: &str,
) -> Option<SymbolFact> {
let kind = node.kind();
let symbol_kind = match kind {
"function_declaration" => SymbolKind::Function,
"method_definition" => SymbolKind::Method,
"class_declaration" => SymbolKind::Class,
_ => return None, };
let name = self.extract_name(node, source)?;
let normalized_kind = symbol_kind.normalized_key().to_string();
let fqn = scope_stack.fqn_for_symbol(&name);
let builder = FqnBuilder::new(
package_name.to_string(),
file_path.to_string_lossy().to_string(),
ScopeSeparator::Dot,
);
let canonical_fqn = builder.canonical(scope_stack, symbol_kind.clone(), &name);
let display_fqn = builder.display(scope_stack, symbol_kind.clone(), &name);
Some(SymbolFact {
file_path: file_path.clone(),
kind: symbol_kind,
kind_normalized: normalized_kind,
name: Some(name),
fqn: Some(fqn),
canonical_fqn: Some(canonical_fqn),
display_fqn: Some(display_fqn),
byte_start: node.start_byte(),
byte_end: node.end_byte(),
start_line: node.start_position().row + 1, start_col: node.start_position().column,
end_line: node.end_position().row + 1,
end_col: node.end_position().column,
})
}
fn extract_name(&self, node: &tree_sitter::Node, source: &[u8]) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"identifier" | "property_identifier" => {
let name_bytes = safe_slice(source, child.start_byte(), child.end_byte())?;
return std::str::from_utf8(name_bytes).ok().map(|s| s.to_string());
}
_ => {}
}
}
None
}
pub fn extract_symbols_with_parser(
parser: &mut tree_sitter::Parser,
file_path: PathBuf,
source: &[u8],
) -> Vec<SymbolFact> {
let tree = match parser.parse(source, None) {
Some(t) => t,
None => return Vec::new(),
};
let root_node = tree.root_node();
let mut facts = Vec::new();
let mut scope_stack = ScopeStack::new(ScopeSeparator::Dot);
let package_name = ".";
Self::walk_tree_with_scope_static(
&root_node,
source,
&file_path,
&mut facts,
&mut scope_stack,
package_name,
);
facts
}
fn walk_tree_with_scope_static(
node: &tree_sitter::Node,
source: &[u8],
file_path: &PathBuf,
facts: &mut Vec<SymbolFact>,
scope_stack: &mut ScopeStack,
package_name: &str,
) {
let kind = node.kind();
if kind == "export_statement" {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
Self::walk_tree_with_scope_static(
&child,
source,
file_path,
facts,
scope_stack,
package_name,
);
}
return;
}
if kind == "class_declaration" {
if let Some(name) = Self::extract_name_static(node, source) {
if let Some(fact) = Self::extract_symbol_with_fqn_static(
node,
source,
file_path,
scope_stack,
package_name,
) {
facts.push(fact);
}
scope_stack.push(&name);
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
Self::walk_tree_with_scope_static(
&child,
source,
file_path,
facts,
scope_stack,
package_name,
);
}
scope_stack.pop();
return;
}
}
if let Some(fact) =
Self::extract_symbol_with_fqn_static(node, source, file_path, scope_stack, package_name)
{
facts.push(fact);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
Self::walk_tree_with_scope_static(
&child,
source,
file_path,
facts,
scope_stack,
package_name,
);
}
}
fn extract_symbol_with_fqn_static(
node: &tree_sitter::Node,
source: &[u8],
file_path: &PathBuf,
scope_stack: &ScopeStack,
package_name: &str,
) -> Option<SymbolFact> {
let kind = node.kind();
let symbol_kind = match kind {
"function_declaration" => SymbolKind::Function,
"method_definition" => SymbolKind::Method,
"class_declaration" => SymbolKind::Class,
_ => return None,
};
let name = Self::extract_name_static(node, source)?;
let normalized_kind = symbol_kind.normalized_key().to_string();
let fqn = scope_stack.fqn_for_symbol(&name);
let builder = FqnBuilder::new(
package_name.to_string(),
file_path.to_string_lossy().to_string(),
ScopeSeparator::Dot,
);
let canonical_fqn = builder.canonical(scope_stack, symbol_kind.clone(), &name);
let display_fqn = builder.display(scope_stack, symbol_kind.clone(), &name);
Some(SymbolFact {
file_path: file_path.clone(),
kind: symbol_kind,
kind_normalized: normalized_kind,
name: Some(name),
fqn: Some(fqn),
canonical_fqn: Some(canonical_fqn),
display_fqn: Some(display_fqn),
byte_start: node.start_byte(),
byte_end: node.end_byte(),
start_line: node.start_position().row + 1,
start_col: node.start_position().column,
end_line: node.end_position().row + 1,
end_col: node.end_position().column,
})
}
fn extract_name_static(node: &tree_sitter::Node, source: &[u8]) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
match child.kind() {
"identifier" | "property_identifier" => {
let name_bytes = safe_slice(source, child.start_byte(), child.end_byte())?;
return std::str::from_utf8(name_bytes).ok().map(|s| s.to_string());
}
_ => {}
}
}
None
}
pub fn extract_references(
&mut self,
file_path: PathBuf,
source: &[u8],
symbols: &[SymbolFact],
) -> Vec<ReferenceFact> {
let tree = match self.parser.parse(source, None) {
Some(t) => t,
None => return Vec::new(),
};
let root_node = tree.root_node();
let mut references = Vec::new();
self.walk_tree_for_references(&root_node, source, &file_path, symbols, &mut references);
references
}
fn walk_tree_for_references(
&self,
node: &tree_sitter::Node,
source: &[u8],
file_path: &PathBuf,
symbols: &[SymbolFact],
references: &mut Vec<ReferenceFact>,
) {
if let Some(reference) = self.extract_reference(node, source, file_path, symbols) {
references.push(reference);
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_tree_for_references(&child, source, file_path, symbols, references);
}
}
fn extract_reference(
&self,
node: &tree_sitter::Node,
source: &[u8],
file_path: &PathBuf,
symbols: &[SymbolFact],
) -> Option<ReferenceFact> {
if node.kind() != "identifier" {
return None;
}
let text_bytes = &source[node.start_byte()..node.end_byte()];
let text = std::str::from_utf8(text_bytes).ok()?;
let referenced_symbol = symbols
.iter()
.find(|s| s.name.as_ref().map(|n| n == text).unwrap_or(false))?;
let ref_start = node.start_byte();
if ref_start < referenced_symbol.byte_end {
return None; }
Some(ReferenceFact {
file_path: file_path.clone(),
referenced_symbol: text.to_string(),
byte_start: ref_start,
byte_end: node.end_byte(),
start_line: node.start_position().row + 1,
start_col: node.start_position().column,
end_line: node.end_position().row + 1,
end_col: node.end_position().column,
})
}
pub fn extract_calls(
&mut self,
file_path: PathBuf,
source: &[u8],
symbols: &[SymbolFact],
) -> Vec<CallFact> {
let tree = match self.parser.parse(source, None) {
Some(t) => t,
None => return Vec::new(),
};
let root_node = tree.root_node();
let mut calls = Vec::new();
let symbol_map: std::collections::HashMap<String, &SymbolFact> = symbols
.iter()
.filter_map(|s| s.name.as_ref().map(|name| (name.clone(), s)))
.collect();
let functions: Vec<&SymbolFact> = symbols
.iter()
.filter(|s| s.kind == SymbolKind::Function)
.collect();
self.walk_tree_for_calls(
&root_node,
source,
&file_path,
&symbol_map,
&functions,
&mut calls,
);
calls
}
fn walk_tree_for_calls(
&self,
node: &tree_sitter::Node,
source: &[u8],
file_path: &PathBuf,
symbol_map: &std::collections::HashMap<String, &SymbolFact>,
_functions: &[&SymbolFact],
calls: &mut Vec<CallFact>,
) {
self.walk_tree_for_calls_with_caller(node, source, file_path, symbol_map, None, calls);
}
fn walk_tree_for_calls_with_caller(
&self,
node: &tree_sitter::Node,
source: &[u8],
file_path: &PathBuf,
symbol_map: &std::collections::HashMap<String, &SymbolFact>,
current_caller: Option<&SymbolFact>,
calls: &mut Vec<CallFact>,
) {
let kind = node.kind();
let caller: Option<&SymbolFact> =
if kind == "function_declaration" || kind == "function_definition" {
self.extract_function_name(node, source)
.and_then(|name| symbol_map.get(&name).copied())
} else {
current_caller
};
if kind == "call_expression" {
if let Some(caller_fact) = caller {
self.extract_calls_in_node(node, source, file_path, caller_fact, symbol_map, calls);
}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.walk_tree_for_calls_with_caller(
&child, source, file_path, symbol_map, caller, calls,
);
}
}
fn extract_function_name(&self, node: &tree_sitter::Node, source: &[u8]) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "identifier" {
let name_bytes = safe_slice(source, child.start_byte(), child.end_byte())?;
return std::str::from_utf8(name_bytes).ok().map(|s| s.to_string());
}
}
None
}
fn extract_calls_in_node(
&self,
node: &tree_sitter::Node,
source: &[u8],
file_path: &PathBuf,
caller: &SymbolFact,
symbol_map: &std::collections::HashMap<String, &SymbolFact>,
calls: &mut Vec<CallFact>,
) {
let kind = node.kind();
if kind == "call_expression" {
if let Some(callee_name) = self.extract_callee_from_call(node, source) {
if symbol_map.contains_key(&callee_name) {
let node_start = node.start_byte();
let node_end = node.end_byte();
let call_fact = CallFact {
file_path: file_path.clone(),
caller: caller.name.clone().unwrap_or_default(),
callee: callee_name,
caller_symbol_id: None,
callee_symbol_id: None,
byte_start: node_start,
byte_end: node_end,
start_line: node.start_position().row + 1,
start_col: node.start_position().column,
end_line: node.end_position().row + 1,
end_col: node.end_position().column,
};
calls.push(call_fact);
}
}
}
}
fn extract_callee_from_call(&self, node: &tree_sitter::Node, source: &[u8]) -> Option<String> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
if child.kind() == "identifier" {
let name_bytes = safe_slice(source, child.start_byte(), child.end_byte())?;
return std::str::from_utf8(name_bytes).ok().map(|s| s.to_string());
}
if child.kind() == "member_expression" {
return self.extract_member_expression_name(&child, source);
}
}
None
}
fn extract_member_expression_name(
&self,
node: &tree_sitter::Node,
source: &[u8],
) -> Option<String> {
let mut cursor = node.walk();
let children: Vec<_> = node.children(&mut cursor).collect();
if children.len() >= 2 {
let prop = &children[1];
if prop.kind() == "property_identifier" {
let name_bytes = safe_slice(source, prop.start_byte(), prop.end_byte())?;
return std::str::from_utf8(name_bytes).ok().map(|s| s.to_string());
}
}
None
}
}
impl Default for JavaScriptParser {
fn default() -> Self {
Self::new().expect("Failed to create JavaScript parser") }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_function() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"function foo() {\n return;\n}\n";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
assert_eq!(facts.len(), 1);
assert_eq!(facts[0].name, Some("foo".to_string()));
assert_eq!(facts[0].kind, SymbolKind::Function);
}
#[test]
fn test_extract_class() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"class MyClass {\n constructor() {}\n}\n";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
assert!(facts.len() >= 1);
let classes: Vec<_> = facts
.iter()
.filter(|f| f.kind == SymbolKind::Class)
.collect();
assert_eq!(classes.len(), 1);
assert_eq!(classes[0].name, Some("MyClass".to_string()));
}
#[test]
fn test_extract_method() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"class MyClass {\n myMethod() {\n return;\n }\n}\n";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
assert!(facts.len() >= 2);
let methods: Vec<_> = facts
.iter()
.filter(|f| f.kind == SymbolKind::Method)
.collect();
assert_eq!(methods.len(), 1);
assert_eq!(methods[0].name, Some("myMethod".to_string()));
}
#[test]
fn test_extract_export_function() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"export function foo() {\n return;\n}\n";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
assert_eq!(facts.len(), 1);
assert_eq!(facts[0].name, Some("foo".to_string()));
assert_eq!(facts[0].kind, SymbolKind::Function);
}
#[test]
fn test_extract_export_class() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"export class MyClass {}\n";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
assert_eq!(facts.len(), 1);
assert_eq!(facts[0].name, Some("MyClass".to_string()));
assert_eq!(facts[0].kind, SymbolKind::Class);
}
#[test]
fn test_extract_export_default() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"export default class Foo {}\n";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
assert_eq!(facts.len(), 1);
assert_eq!(facts[0].name, Some("Foo".to_string()));
}
#[test]
fn test_extract_multiple_symbols() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"
function foo() {}
class Bar {
method1() {}
method2() {}
}
export function baz() {}
";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
assert!(facts.len() >= 5);
let functions: Vec<_> = facts
.iter()
.filter(|f| f.kind == SymbolKind::Function)
.collect();
assert_eq!(functions.len(), 2);
let classes: Vec<_> = facts
.iter()
.filter(|f| f.kind == SymbolKind::Class)
.collect();
assert_eq!(classes.len(), 1);
let methods: Vec<_> = facts
.iter()
.filter(|f| f.kind == SymbolKind::Method)
.collect();
assert_eq!(methods.len(), 2); }
#[test]
fn test_empty_file() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"";
let facts = parser.extract_symbols(PathBuf::from("empty.js"), source);
assert_eq!(facts.len(), 0);
}
#[test]
fn test_syntax_error_returns_empty() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"function broken(\n // invalid js";
let facts = parser.extract_symbols(PathBuf::from("broken.js"), source);
assert!(
facts.len() < 10,
"Syntax error should not produce many symbols"
);
}
#[test]
fn test_byte_spans_within_bounds() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"function foo() {}";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
assert_eq!(facts.len(), 1);
let fact = &facts[0];
assert!(fact.byte_start < fact.byte_end);
assert!(fact.byte_end <= source.len());
}
#[test]
fn test_line_column_positions() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"function foo() {\n return;\n}\n";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
assert_eq!(facts.len(), 1);
let fact = &facts[0];
assert_eq!(fact.start_line, 1);
assert_eq!(fact.start_col, 0); }
#[test]
fn test_fqn_class_method() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"
class MyClass {
myMethod() {
return;
}
}
";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
let classes: Vec<_> = facts
.iter()
.filter(|f| f.kind == SymbolKind::Class)
.collect();
assert_eq!(classes.len(), 1);
assert_eq!(classes[0].fqn, Some("MyClass".to_string()));
let methods: Vec<_> = facts
.iter()
.filter(|f| f.kind == SymbolKind::Method)
.collect();
assert_eq!(methods.len(), 1);
assert_eq!(methods[0].fqn, Some("MyClass.myMethod".to_string()));
}
#[test]
fn test_canonical_fqn_format() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"function foo() {\n return;\n}\n";
let facts = parser.extract_symbols(PathBuf::from("src/test.js"), source);
assert_eq!(facts.len(), 1);
let fact = &facts[0];
assert!(fact.canonical_fqn.is_some());
let canonical = fact.canonical_fqn.as_ref().unwrap();
assert!(canonical.contains(".::src/test.js::Function foo"));
}
#[test]
fn test_display_fqn_format() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"function foo() {\n return;\n}\n";
let facts = parser.extract_symbols(PathBuf::from("test.js"), source);
assert_eq!(facts.len(), 1);
let fact = &facts[0];
assert!(fact.display_fqn.is_some());
let display = fact.display_fqn.as_ref().unwrap();
assert_eq!(display, "..foo");
}
#[test]
fn test_fqn_class_method_with_fqn_builder() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"
class MyClass {
myMethod() {
return;
}
}
";
let facts = parser.extract_symbols(PathBuf::from("src/test.js"), source);
let classes: Vec<_> = facts
.iter()
.filter(|f| f.kind == SymbolKind::Class)
.collect();
assert_eq!(classes.len(), 1);
let class_fact = &classes[0];
assert!(class_fact.canonical_fqn.is_some());
assert!(class_fact
.canonical_fqn
.as_ref()
.unwrap()
.contains(".::src/test.js::Struct MyClass"));
assert_eq!(class_fact.display_fqn.as_ref().unwrap(), "..MyClass");
let methods: Vec<_> = facts
.iter()
.filter(|f| f.kind == SymbolKind::Method)
.collect();
assert_eq!(methods.len(), 1);
let method_fact = &methods[0];
assert!(method_fact.canonical_fqn.is_some());
assert!(method_fact
.canonical_fqn
.as_ref()
.unwrap()
.contains(".::src/test.js::Method myMethod"));
assert_eq!(
method_fact.display_fqn.as_ref().unwrap(),
"..MyClass.myMethod"
);
}
#[test]
fn test_fqn_export_function() {
let mut parser = JavaScriptParser::new().unwrap();
let source = b"export function foo() {\n return;\n}\n";
let facts = parser.extract_symbols(PathBuf::from("src/test.js"), source);
assert_eq!(facts.len(), 1);
let fact = &facts[0];
assert!(fact.canonical_fqn.is_some());
assert!(fact.display_fqn.is_some());
assert!(fact
.canonical_fqn
.as_ref()
.unwrap()
.contains(".::src/test.js::Function foo"));
assert_eq!(fact.display_fqn.as_ref().unwrap(), "..foo");
}
}