use tree_sitter::{Node, Tree, TreeCursor};
use crate::types::{ChunkError, ChunkMetadata, FileSymbols, LineIndex, OutlineUnit};
pub fn extract_metadata_from_tree(
tree: &Tree,
content: &str,
chunk_start: usize,
chunk_end: usize,
language_name: &str,
) -> Result<ChunkMetadata, ChunkError> {
let root_node = tree.root_node();
let primary_node = find_primary_node_for_range(root_node, chunk_start, chunk_end);
let node_type = primary_node.kind().to_string();
let node_name = extract_node_name(&primary_node, content);
let scope_path = build_scope_path(&primary_node, content);
let parent_context = extract_parent_context(&primary_node, content);
let (definitions, references) = extract_symbols_in_range(root_node, content, chunk_start, chunk_end);
Ok(ChunkMetadata {
node_type,
node_name,
language: language_name.to_string(),
parent_context,
scope_path,
definitions,
references,
})
}
pub fn extract_file_symbols(tree: &Tree, content: &str) -> FileSymbols {
let root_node = tree.root_node();
let mut cursor = root_node.walk();
let line_index = LineIndex::new(content);
let mut outline = Vec::new();
extract_outline_units(&mut cursor, content, &line_index, &mut outline);
outline.sort_by(|a, b| {
a.start_byte
.cmp(&b.start_byte)
.then_with(|| a.end_byte.cmp(&b.end_byte))
.then_with(|| a.kind.cmp(&b.kind))
.then_with(|| a.name.as_deref().unwrap_or("").cmp(b.name.as_deref().unwrap_or("")))
});
let (definitions, references) = extract_symbols_in_range(root_node, content, 0, content.len());
FileSymbols {
outline,
definitions,
references,
}
}
fn find_primary_node_for_range(node: Node, start_byte: usize, end_byte: usize) -> Node {
let mut cursor = node.walk();
let mut best_node = node;
visit_node(&mut cursor, &mut best_node, start_byte, end_byte);
best_node
}
fn visit_node<'a>(cursor: &mut TreeCursor<'a>, best_node: &mut Node<'a>, start_byte: usize, end_byte: usize) {
let node = cursor.node();
if node.start_byte() <= start_byte && node.end_byte() >= end_byte {
if node.byte_range().len() < best_node.byte_range().len() {
*best_node = node;
}
if cursor.goto_first_child() {
loop {
visit_node(cursor, best_node, start_byte, end_byte);
if !cursor.goto_next_sibling() {
break;
}
}
cursor.goto_parent();
}
}
}
fn extract_outline_units(
cursor: &mut TreeCursor,
content: &str,
line_index: &LineIndex,
outline: &mut Vec<OutlineUnit>,
) {
let node = cursor.node();
if is_significant_scope(node.kind()) {
let (start_line, end_line) = line_index.line_numbers(node.start_byte(), node.end_byte());
outline.push(OutlineUnit {
kind: node.kind().to_string(),
name: extract_node_name(&node, content),
start_byte: node.start_byte(),
end_byte: node.end_byte(),
start_line,
end_line,
});
}
if cursor.goto_first_child() {
loop {
extract_outline_units(cursor, content, line_index, outline);
if !cursor.goto_next_sibling() {
break;
}
}
cursor.goto_parent();
}
}
fn extract_node_name(node: &Node, content: &str) -> Option<String> {
match node.kind() {
"function_declaration"
| "function_definition"
| "method_definition"
| "function_item"
| "function"
| "method_declaration" => find_child_by_kind(node, "identifier")
.or_else(|| find_child_by_kind(node, "property_identifier"))
.map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string()),
"class_declaration" | "class_definition" | "class" => find_child_by_kind(node, "identifier")
.or_else(|| find_child_by_kind(node, "type_identifier"))
.map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string()),
"interface_declaration" | "trait_item" | "trait_definition" => find_child_by_kind(node, "identifier")
.or_else(|| find_child_by_kind(node, "type_identifier"))
.map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string()),
"enum_declaration" | "enum_specifier" => find_child_by_kind(node, "identifier")
.or_else(|| find_child_by_kind(node, "type_identifier"))
.map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string()),
"struct_item" | "struct_declaration" | "struct_specifier" => find_child_by_kind(node, "type_identifier")
.or_else(|| find_child_by_kind(node, "identifier"))
.map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string()),
"constructor_declaration" => {
find_child_by_kind(node, "identifier").map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string())
}
"property_declaration" => {
find_child_by_kind(node, "identifier").map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string())
}
"module" | "module_definition" => find_child_by_kind(node, "identifier")
.or_else(|| find_child_by_kind(node, "module_name"))
.map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string()),
"impl_item" => {
if let Some(type_id) = find_child_by_kind(node, "type_identifier") {
Some(type_id.utf8_text(content.as_bytes()).unwrap_or("").to_string())
} else if let Some(generic_type) = find_child_by_kind(node, "generic_type") {
find_child_by_kind(&generic_type, "type_identifier")
.map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string())
} else {
None
}
}
"enum_item" | "const_item" | "static_item" | "mod_item" => find_child_by_kind(node, "identifier")
.or_else(|| find_child_by_kind(node, "type_identifier"))
.map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string()),
"def" | "defn" | "defp" | "defmodule" | "defprotocol" => {
find_child_by_kind(node, "identifier").map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string())
}
"object_definition" | "object_declaration" => find_child_by_kind(node, "identifier")
.or_else(|| find_child_by_kind(node, "type_identifier"))
.map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string()),
"namespace_declaration" | "namespace_definition" => {
find_child_by_kind(node, "identifier").map(|n| n.utf8_text(content.as_bytes()).unwrap_or("").to_string())
}
_ => None,
}
}
fn find_child_by_kind<'a>(node: &'a Node, kind: &str) -> Option<Node<'a>> {
let mut cursor = node.walk();
node.children(&mut cursor).find(|&child| child.kind() == kind)
}
fn build_scope_path(node: &Node, content: &str) -> Vec<String> {
let mut path = Vec::new();
let mut current = Some(*node);
while let Some(n) = current {
if let Some(name) = extract_node_name(&n, content) {
path.push(name);
} else if is_significant_scope(n.kind()) {
path.push(n.kind().to_string());
}
current = n.parent();
}
path.reverse();
path
}
fn is_significant_scope(kind: &str) -> bool {
matches!(
kind,
"function_declaration" | "function_definition" | "method_definition" |
"function_item" | "function" | "method_declaration" |
"class_declaration" | "class_definition" | "class" |
"interface_declaration" | "trait_item" | "trait_definition" |
"enum_declaration" | "enum_specifier" |
"struct_item" | "struct_declaration" | "struct_specifier" |
"constructor_declaration" | "property_declaration" |
"module" | "module_definition" |
"impl_item" | "enum_item" | "const_item" | "static_item" | "mod_item" |
"def" | "defn" | "defp" | "defmodule" | "defprotocol" | "defimpl" |
"object_definition" | "object_declaration" |
"namespace" | "namespace_declaration" | "namespace_definition"
)
}
fn extract_parent_context(node: &Node, content: &str) -> Option<String> {
let mut parent = node.parent();
while let Some(p) = parent {
if is_significant_scope(p.kind()) {
return extract_node_name(&p, content);
}
parent = p.parent();
}
None
}
fn extract_symbols_in_range(
root: Node,
content: &str,
start_byte: usize,
end_byte: usize,
) -> (Vec<String>, Vec<String>) {
let mut definitions = Vec::new();
let mut references = Vec::new();
let mut cursor = root.walk();
extract_symbols_recursive(
&mut cursor,
content,
start_byte,
end_byte,
&mut definitions,
&mut references,
);
definitions.sort();
definitions.dedup();
references.sort();
references.dedup();
(definitions, references)
}
fn extract_symbols_recursive(
cursor: &mut TreeCursor,
content: &str,
start_byte: usize,
end_byte: usize,
definitions: &mut Vec<String>,
references: &mut Vec<String>,
) {
let node = cursor.node();
if node.end_byte() < start_byte || node.start_byte() > end_byte {
return;
}
match node.kind() {
"variable_declarator" | "parameter" | "identifier" if is_definition_context(&node) => {
if let Ok(text) = node.utf8_text(content.as_bytes()) {
definitions.push(text.to_string());
}
}
"function_declaration"
| "function_definition"
| "method_definition"
| "function_item"
| "function"
| "method_declaration" => {
if let Some(name_node) = find_child_by_kind(&node, "identifier")
&& let Ok(text) = name_node.utf8_text(content.as_bytes())
{
definitions.push(text.to_string());
}
}
"class_declaration"
| "class_definition"
| "struct_item"
| "interface_declaration"
| "struct_declaration"
| "struct_specifier"
| "trait_definition" => {
if let Some(name_node) =
find_child_by_kind(&node, "identifier").or_else(|| find_child_by_kind(&node, "type_identifier"))
&& let Ok(text) = name_node.utf8_text(content.as_bytes())
{
definitions.push(text.to_string());
}
}
"enum_declaration" | "enum_specifier" | "enum_item" => {
if let Some(name_node) =
find_child_by_kind(&node, "identifier").or_else(|| find_child_by_kind(&node, "type_identifier"))
&& let Ok(text) = name_node.utf8_text(content.as_bytes())
{
definitions.push(text.to_string());
}
}
"constructor_declaration" | "property_declaration" => {
if let Some(name_node) = find_child_by_kind(&node, "identifier")
&& let Ok(text) = name_node.utf8_text(content.as_bytes())
{
definitions.push(text.to_string());
}
}
"module" | "module_definition" | "mod_item" => {
if let Some(name_node) =
find_child_by_kind(&node, "identifier").or_else(|| find_child_by_kind(&node, "module_name"))
&& let Ok(text) = name_node.utf8_text(content.as_bytes())
{
definitions.push(text.to_string());
}
}
"const_item" | "static_item" => {
if let Some(name_node) = find_child_by_kind(&node, "identifier")
&& let Ok(text) = name_node.utf8_text(content.as_bytes())
{
definitions.push(text.to_string());
}
}
"def" | "defn" | "defp" | "defmodule" | "defprotocol" | "defimpl" => {
if let Some(name_node) = find_child_by_kind(&node, "identifier")
&& let Ok(text) = name_node.utf8_text(content.as_bytes())
{
definitions.push(text.to_string());
}
}
"object_definition" | "object_declaration" => {
if let Some(name_node) =
find_child_by_kind(&node, "identifier").or_else(|| find_child_by_kind(&node, "type_identifier"))
&& let Ok(text) = name_node.utf8_text(content.as_bytes())
{
definitions.push(text.to_string());
}
}
"namespace_declaration" | "namespace_definition" => {
if let Some(name_node) = find_child_by_kind(&node, "identifier")
&& let Ok(text) = name_node.utf8_text(content.as_bytes())
{
definitions.push(text.to_string());
}
}
"call_expression" | "call" => {
if let Some(func_node) = node.child(0)
&& let Ok(text) = func_node.utf8_text(content.as_bytes())
{
references.push(text.to_string());
}
}
_ => {}
}
if cursor.goto_first_child() {
loop {
extract_symbols_recursive(cursor, content, start_byte, end_byte, definitions, references);
if !cursor.goto_next_sibling() {
break;
}
}
cursor.goto_parent();
}
}
fn is_definition_context(node: &Node) -> bool {
if let Some(parent) = node.parent() {
matches!(
parent.kind(),
"variable_declarator" | "parameter" | "formal_parameters" | "pattern" | "shorthand_property_identifier_pattern"
)
} else {
false
}
}