use std::time::{Instant, SystemTime, UNIX_EPOCH};
use tree_sitter::{Node as TsNode, Parser, Tree};
use crate::extraction::complexity::{count_complexity, RUST_COMPLEXITY};
use crate::types::{
generate_node_id, Edge, EdgeKind, ExtractionResult, Node, NodeKind, UnresolvedRef, Visibility,
};
pub struct RustExtractor;
struct ExtractionState {
nodes: Vec<Node>,
edges: Vec<Edge>,
unresolved_refs: Vec<UnresolvedRef>,
errors: Vec<String>,
node_stack: Vec<(String, String)>,
file_path: String,
source: Vec<u8>,
timestamp: u64,
}
impl ExtractionState {
fn new(file_path: &str, source: &str) -> Self {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
Self {
nodes: Vec::new(),
edges: Vec::new(),
unresolved_refs: Vec::new(),
errors: Vec::new(),
node_stack: Vec::new(),
file_path: file_path.to_string(),
source: source.as_bytes().to_vec(),
timestamp,
}
}
fn qualified_prefix(&self) -> String {
self.node_stack
.iter()
.map(|(name, _)| name.as_str())
.collect::<Vec<_>>()
.join("::")
}
fn parent_node_id(&self) -> Option<&str> {
self.node_stack.last().map(|(_, id)| id.as_str())
}
fn node_text(&self, node: TsNode<'_>) -> String {
node.utf8_text(&self.source)
.unwrap_or("<invalid utf8>")
.to_string()
}
}
impl RustExtractor {
pub fn extract(file_path: &str, source: &str) -> ExtractionResult {
let start = Instant::now();
let mut state = ExtractionState::new(file_path, source);
let tree = match Self::parse_source(source) {
Ok(tree) => tree,
Err(msg) => {
state.errors.push(msg);
return Self::build_result(state, start);
}
};
let file_node = Node {
id: generate_node_id(file_path, &NodeKind::File, file_path, 0),
kind: NodeKind::File,
name: file_path.to_string(),
qualified_name: file_path.to_string(),
file_path: file_path.to_string(),
start_line: 0,
attrs_start_line: 0,
end_line: source.lines().count().saturating_sub(1) as u32,
start_column: 0,
end_column: 0,
signature: None,
docstring: None,
visibility: Visibility::Pub,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
let file_node_id = file_node.id.clone();
state.nodes.push(file_node);
state.node_stack.push((file_path.to_string(), file_node_id));
let root = tree.root_node();
Self::visit_children(&mut state, root);
state.node_stack.pop();
Self::build_result(state, start)
}
fn parse_source(source: &str) -> Result<Tree, String> {
let mut parser = Parser::new();
let language = crate::extraction::ts_provider::language("rust");
parser
.set_language(&language)
.map_err(|e| format!("failed to load Rust grammar: {e}"))?;
parser
.parse(source, None)
.ok_or_else(|| "tree-sitter parse returned None".to_string())
}
fn visit_children(state: &mut ExtractionState, node: TsNode<'_>) {
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
let child = cursor.node();
Self::visit_node(state, child);
if !cursor.goto_next_sibling() {
break;
}
}
}
}
fn visit_node(state: &mut ExtractionState, node: TsNode<'_>) {
match node.kind() {
"function_item" | "function_signature_item" => Self::visit_function(state, node),
"struct_item" => Self::visit_struct(state, node),
"enum_item" => Self::visit_enum(state, node),
"trait_item" => Self::visit_trait(state, node),
"impl_item" => Self::visit_impl(state, node),
"use_declaration" => Self::visit_use(state, node),
"const_item" => Self::visit_const(state, node),
"static_item" => Self::visit_static(state, node),
"type_item" => Self::visit_type_alias(state, node),
"mod_item" => Self::visit_module(state, node),
"macro_invocation" => Self::visit_macro_invocation(state, node),
_ => {
Self::visit_children(state, node);
}
}
}
fn visit_function(state: &mut ExtractionState, node: TsNode<'_>) {
let name = Self::extract_name(state, node).unwrap_or_else(|| "<anonymous>".to_string());
let is_inside_impl = state
.node_stack
.iter()
.any(|(_, id)| id.starts_with("impl:"));
let is_inside_trait = state
.node_stack
.iter()
.any(|(_, id)| id.starts_with("trait:"));
let kind = if is_inside_impl || is_inside_trait {
NodeKind::Method
} else {
NodeKind::Function
};
let visibility = Self::extract_visibility(node, state);
let signature = Some(Self::extract_function_signature(state, node));
let docstring = Self::extract_docstring(state, node);
let is_async = Self::detect_async(state, node);
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), name);
let id = generate_node_id(&state.file_path, &kind, &name, start_line);
let metrics = count_complexity(node, &RUST_COMPLEXITY, &state.source);
let graph_node = Node {
id: id.clone(),
kind,
name,
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature,
docstring,
visibility,
is_async,
branches: metrics.branches,
loops: metrics.loops,
returns: metrics.returns,
max_nesting: metrics.max_nesting,
unsafe_blocks: metrics.unsafe_blocks,
unchecked_calls: metrics.unchecked_calls,
assertions: metrics.assertions,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id.clone(),
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
Self::extract_call_sites(state, node, &id);
let self_type = Self::enclosing_impl_type(state);
let var_types = Self::collect_var_types(state, node, self_type.as_deref());
Self::extract_typed_method_calls(state, node, &id, &var_types);
Self::extract_annotations_from_modifiers(state, node, &id);
if let Some(params) = node.child_by_field_name("parameters") {
let mut cursor = params.walk();
if cursor.goto_first_child() {
loop {
let child = cursor.node();
if let Some(ty) = child.child_by_field_name("type") {
Self::emit_type_refs(state, ty, &id, EdgeKind::TypeOf);
}
if !cursor.goto_next_sibling() {
break;
}
}
}
}
if let Some(ret) = node.child_by_field_name("return_type") {
Self::emit_type_refs(state, ret, &id, EdgeKind::Returns);
}
}
fn visit_struct(state: &mut ExtractionState, node: TsNode<'_>) {
let name = Self::extract_name(state, node).unwrap_or_else(|| "<anonymous>".to_string());
let visibility = Self::extract_visibility(node, state);
let signature = Some(Self::extract_struct_signature(state, node));
let docstring = Self::extract_docstring(state, node);
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), name);
let id = generate_node_id(&state.file_path, &NodeKind::Struct, &name, start_line);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::Struct,
name: name.clone(),
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature,
docstring,
visibility,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id.clone(),
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
Self::extract_derive_macros(state, node, &id);
Self::extract_annotations_from_modifiers(state, node, &id);
state.node_stack.push((name, id.clone()));
Self::extract_fields(state, node);
state.node_stack.pop();
}
fn visit_enum(state: &mut ExtractionState, node: TsNode<'_>) {
let name = Self::extract_name(state, node).unwrap_or_else(|| "<anonymous>".to_string());
let visibility = Self::extract_visibility(node, state);
let docstring = Self::extract_docstring(state, node);
let text = state.node_text(node);
let signature = Some(text.lines().next().unwrap_or("").to_string());
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), name);
let id = generate_node_id(&state.file_path, &NodeKind::Enum, &name, start_line);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::Enum,
name: name.clone(),
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature,
docstring,
visibility,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id.clone(),
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
Self::extract_derive_macros(state, node, &id);
Self::extract_annotations_from_modifiers(state, node, &id);
state.node_stack.push((name, id.clone()));
Self::extract_enum_variants(state, node);
state.node_stack.pop();
}
fn visit_trait(state: &mut ExtractionState, node: TsNode<'_>) {
let name = Self::extract_name(state, node).unwrap_or_else(|| "<anonymous>".to_string());
let visibility = Self::extract_visibility(node, state);
let docstring = Self::extract_docstring(state, node);
let signature = Some(format!("trait {name}"));
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), name);
let id = generate_node_id(&state.file_path, &NodeKind::Trait, &name, start_line);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::Trait,
name: name.clone(),
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature,
docstring,
visibility,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id.clone(),
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
Self::extract_annotations_from_modifiers(state, node, &id);
if let Some(bounds) = node.child_by_field_name("bounds") {
let mut cursor = bounds.walk();
for child in bounds.children(&mut cursor) {
let kind = child.kind();
if kind == "," || kind == ":" || kind == "+" {
continue;
}
if let Some(bound_name) = Self::extract_trait_bound_name(state, child) {
state.unresolved_refs.push(UnresolvedRef {
from_node_id: id.clone(),
reference_name: bound_name,
reference_kind: EdgeKind::Extends,
line: child.start_position().row as u32,
column: child.start_position().column as u32,
file_path: state.file_path.clone(),
});
}
}
}
state.node_stack.push((name, id));
if let Some(body) = node.child_by_field_name("body") {
Self::visit_children(state, body);
}
state.node_stack.pop();
}
fn extract_trait_bound_name(state: &ExtractionState, bound: TsNode<'_>) -> Option<String> {
match bound.kind() {
"type_identifier" => Some(state.node_text(bound)),
"scoped_type_identifier" | "generic_type" => {
let mut cursor = bound.walk();
let mut name = None;
for c in bound.children(&mut cursor) {
if c.kind() == "type_identifier" {
name = Some(state.node_text(c));
}
}
name
}
"higher_ranked_trait_bound" => bound
.child_by_field_name("type")
.and_then(|inner| Self::extract_trait_bound_name(state, inner)),
_ => None,
}
}
fn visit_impl(state: &mut ExtractionState, node: TsNode<'_>) {
let type_name =
Self::extract_impl_type_name(state, node).unwrap_or_else(|| "<unknown>".to_string());
let trait_name = Self::extract_impl_trait_name(state, node);
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), type_name);
let id = generate_node_id(&state.file_path, &NodeKind::Impl, &type_name, start_line);
let signature = if let Some(ref trait_n) = trait_name {
Some(format!("impl {trait_n} for {type_name}"))
} else {
Some(format!("impl {type_name}"))
};
let graph_node = Node {
id: id.clone(),
kind: NodeKind::Impl,
name: type_name.clone(),
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature,
docstring: None,
visibility: Visibility::Private,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id.clone(),
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
if let Some(ref trait_n) = trait_name {
state.unresolved_refs.push(UnresolvedRef {
from_node_id: id.clone(),
reference_name: trait_n.clone(),
reference_kind: EdgeKind::Implements,
line: start_line,
column: start_column,
file_path: state.file_path.clone(),
});
}
Self::extract_annotations_from_modifiers(state, node, &id);
state.node_stack.push((type_name, id));
if let Some(body) = node.child_by_field_name("body") {
Self::visit_children(state, body);
}
state.node_stack.pop();
}
fn visit_use(state: &mut ExtractionState, node: TsNode<'_>) {
let text = state.node_text(node);
let path = text
.trim()
.strip_prefix("use ")
.unwrap_or(&text)
.trim_end_matches(';')
.trim()
.to_string();
let visibility = Self::extract_visibility(node, state);
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), path);
let id = generate_node_id(&state.file_path, &NodeKind::Use, &path, start_line);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::Use,
name: path.clone(),
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature: Some(text.trim().to_string()),
docstring: None,
visibility,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id.clone(),
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
state.unresolved_refs.push(UnresolvedRef {
from_node_id: id,
reference_name: path,
reference_kind: EdgeKind::Uses,
line: start_line,
column: start_column,
file_path: state.file_path.clone(),
});
}
fn visit_const(state: &mut ExtractionState, node: TsNode<'_>) {
let name = Self::extract_name(state, node).unwrap_or_else(|| "<anonymous>".to_string());
let visibility = Self::extract_visibility(node, state);
let docstring = Self::extract_docstring(state, node);
let text = state.node_text(node);
let signature = Some(text.lines().next().unwrap_or("").to_string());
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), name);
let id = generate_node_id(&state.file_path, &NodeKind::Const, &name, start_line);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::Const,
name,
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature,
docstring,
visibility,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id,
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
}
fn visit_static(state: &mut ExtractionState, node: TsNode<'_>) {
let name = Self::extract_name(state, node).unwrap_or_else(|| "<anonymous>".to_string());
let visibility = Self::extract_visibility(node, state);
let docstring = Self::extract_docstring(state, node);
let text = state.node_text(node);
let signature = Some(text.lines().next().unwrap_or("").to_string());
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), name);
let id = generate_node_id(&state.file_path, &NodeKind::Static, &name, start_line);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::Static,
name,
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature,
docstring,
visibility,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id,
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
}
fn visit_type_alias(state: &mut ExtractionState, node: TsNode<'_>) {
let name = Self::extract_name(state, node).unwrap_or_else(|| "<anonymous>".to_string());
let visibility = Self::extract_visibility(node, state);
let docstring = Self::extract_docstring(state, node);
let text = state.node_text(node);
let signature = Some(text.trim().to_string());
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), name);
let id = generate_node_id(&state.file_path, &NodeKind::TypeAlias, &name, start_line);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::TypeAlias,
name,
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature,
docstring,
visibility,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id,
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
}
fn visit_module(state: &mut ExtractionState, node: TsNode<'_>) {
let name = Self::extract_name(state, node).unwrap_or_else(|| "<anonymous>".to_string());
let visibility = Self::extract_visibility(node, state);
let docstring = Self::extract_docstring(state, node);
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), name);
let id = generate_node_id(&state.file_path, &NodeKind::Module, &name, start_line);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::Module,
name: name.clone(),
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature: Some(format!("mod {name}")),
docstring,
visibility,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id.clone(),
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
Self::extract_annotations_from_modifiers(state, node, &id);
state.node_stack.push((name, id));
if let Some(body) = node.child_by_field_name("body") {
Self::visit_children(state, body);
}
state.node_stack.pop();
}
fn visit_macro_invocation(state: &mut ExtractionState, node: TsNode<'_>) {
let macro_name = node.child_by_field_name("macro").map_or_else(
|| {
let text = state.node_text(node);
text.split('!').next().unwrap_or("").trim().to_string()
},
|n| state.node_text(n),
);
let start_line = node.start_position().row as u32;
let start_column = node.start_position().column as u32;
if let Some(parent_id) = state.parent_node_id() {
state.unresolved_refs.push(UnresolvedRef {
from_node_id: parent_id.to_string(),
reference_name: macro_name,
reference_kind: EdgeKind::Calls,
line: start_line,
column: start_column,
file_path: state.file_path.clone(),
});
}
}
fn extract_name(state: &ExtractionState, node: TsNode<'_>) -> Option<String> {
node.child_by_field_name("name").map(|n| state.node_text(n))
}
fn extract_impl_type_name(state: &ExtractionState, node: TsNode<'_>) -> Option<String> {
node.child_by_field_name("type").map(|n| state.node_text(n))
}
fn extract_impl_trait_name(state: &ExtractionState, node: TsNode<'_>) -> Option<String> {
node.child_by_field_name("trait")
.map(|n| state.node_text(n))
}
fn extract_visibility(node: TsNode<'_>, state: &ExtractionState) -> Visibility {
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
let child = cursor.node();
if child.kind() == "visibility_modifier" {
let text = state.node_text(child);
return match text.as_str() {
"pub" => Visibility::Pub,
s if s.contains("crate") => Visibility::PubCrate,
s if s.contains("super") => Visibility::PubSuper,
_ => Visibility::Pub,
};
}
if !cursor.goto_next_sibling() {
break;
}
}
}
Visibility::Private
}
fn extract_function_signature(state: &ExtractionState, node: TsNode<'_>) -> String {
let text = state.node_text(node);
if let Some(brace_pos) = text.find('{') {
text[..brace_pos].trim().to_string()
} else {
text.trim_end_matches(';').trim().to_string()
}
}
fn extract_struct_signature(state: &ExtractionState, node: TsNode<'_>) -> String {
let text = state.node_text(node);
if let Some(brace_pos) = text.find('{') {
text[..brace_pos].trim().to_string()
} else {
text.lines().next().unwrap_or("").trim().to_string()
}
}
fn extract_docstring(state: &ExtractionState, node: TsNode<'_>) -> Option<String> {
let mut comments = Vec::new();
let mut current = node.prev_named_sibling();
while let Some(sibling) = current {
match sibling.kind() {
"line_comment" | "block_comment" => {
let text = state.node_text(sibling);
comments.push(text);
current = sibling.prev_named_sibling();
}
"attribute_item" => {
current = sibling.prev_named_sibling();
}
_ => break,
}
}
if comments.is_empty() {
return None;
}
comments.reverse();
let cleaned: Vec<String> = comments.iter().map(|c| Self::clean_comment(c)).collect();
let result = cleaned.join("\n").trim().to_string();
if result.is_empty() {
None
} else {
Some(result)
}
}
fn clean_comment(comment: &str) -> String {
let trimmed = comment.trim();
if let Some(stripped) = trimmed.strip_prefix("///") {
stripped.strip_prefix(' ').unwrap_or(stripped).to_string()
} else if let Some(stripped) = trimmed.strip_prefix("//!") {
stripped.strip_prefix(' ').unwrap_or(stripped).to_string()
} else if let Some(stripped) = trimmed.strip_prefix("//") {
stripped.strip_prefix(' ').unwrap_or(stripped).to_string()
} else if trimmed.starts_with("/*") && trimmed.ends_with("*/") {
let inner = &trimmed[2..trimmed.len() - 2];
inner
.lines()
.map(|line| {
let l = line.trim();
l.strip_prefix("* ")
.or_else(|| l.strip_prefix('*'))
.unwrap_or(l)
})
.collect::<Vec<_>>()
.join("\n")
.trim()
.to_string()
} else {
trimmed.to_string()
}
}
fn detect_async(state: &ExtractionState, node: TsNode<'_>) -> bool {
let text = state.node_text(node);
let trimmed = text.trim_start();
trimmed.starts_with("async ")
|| trimmed.starts_with("pub async ")
|| trimmed.starts_with("pub(crate) async ")
|| trimmed.starts_with("pub(super) async ")
}
fn extract_fields(state: &mut ExtractionState, struct_node: TsNode<'_>) {
if let Some(body) = struct_node.child_by_field_name("body") {
let mut cursor = body.walk();
if cursor.goto_first_child() {
loop {
let child = cursor.node();
if child.kind() == "field_declaration" {
Self::extract_single_field(state, child);
}
if !cursor.goto_next_sibling() {
break;
}
}
}
}
}
fn extract_single_field(state: &mut ExtractionState, node: TsNode<'_>) {
let name = Self::extract_name(state, node).unwrap_or_else(|| "<anonymous>".to_string());
let visibility = Self::extract_visibility(node, state);
let text = state.node_text(node);
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), name);
let id = generate_node_id(&state.file_path, &NodeKind::Field, &name, start_line);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::Field,
name,
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature: Some(text.trim().trim_end_matches(',').trim().to_string()),
docstring: None,
visibility,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id.clone(),
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
if let Some(type_node) = node.child_by_field_name("type") {
Self::emit_type_refs(state, type_node, &id, EdgeKind::TypeOf);
}
}
fn extract_enum_variants(state: &mut ExtractionState, enum_node: TsNode<'_>) {
if let Some(body) = enum_node.child_by_field_name("body") {
let mut cursor = body.walk();
if cursor.goto_first_child() {
loop {
let child = cursor.node();
if child.kind() == "enum_variant" {
Self::extract_single_variant(state, child);
}
if !cursor.goto_next_sibling() {
break;
}
}
}
}
}
fn extract_single_variant(state: &mut ExtractionState, node: TsNode<'_>) {
let name = Self::extract_name(state, node).unwrap_or_else(|| "<anonymous>".to_string());
let text = state.node_text(node);
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::{}", state.qualified_prefix(), name);
let id = generate_node_id(&state.file_path, &NodeKind::EnumVariant, &name, start_line);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::EnumVariant,
name,
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature: Some(text.trim().trim_end_matches(',').to_string()),
docstring: None,
visibility: Visibility::Pub,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
if let Some(parent_id) = state.parent_node_id() {
state.edges.push(Edge {
source: parent_id.to_string(),
target: id,
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
}
const CONSTRUCTOR_NAMES: &'static [&'static str] = &[
"new",
"default",
"with_capacity",
"from",
"try_from",
"build",
"create",
"open",
"init",
"builder",
"from_str",
];
fn enclosing_impl_type(state: &ExtractionState) -> Option<String> {
state
.node_stack
.iter()
.rev()
.find(|(_, id)| id.starts_with("impl:"))
.map(|(name, _)| name.clone())
}
fn normalize_type_name(raw: &str) -> Option<String> {
let mut s = raw.trim();
loop {
if let Some(r) = s.strip_prefix('&') {
s = r.trim_start();
} else if let Some(r) = s.strip_prefix("mut ") {
s = r.trim_start();
} else if s.starts_with('\'') {
match s.find(char::is_whitespace) {
Some(pos) => s = s[pos..].trim_start(),
None => return None,
}
} else {
break;
}
}
if let Some(r) = s.strip_prefix("dyn ") {
s = r.trim_start();
}
if s.starts_with("impl ") {
return None;
}
if let Some(pos) = s.find('<') {
s = s[..pos].trim_end();
}
let seg = s.rsplit("::").next().unwrap_or(s).trim();
let first = seg.chars().next()?;
if !first.is_alphabetic() && first != '_' {
return None;
}
Some(seg.to_string())
}
fn binding_ident(state: &ExtractionState, pat: TsNode<'_>) -> Option<String> {
match pat.kind() {
"identifier" => Some(state.node_text(pat)),
"mut_pattern" => pat
.named_child(0)
.filter(|c| c.kind() == "identifier")
.map(|c| state.node_text(c)),
_ => None,
}
}
fn infer_expr_type(
state: &ExtractionState,
value: TsNode<'_>,
self_type: Option<&str>,
) -> Option<String> {
match value.kind() {
"call_expression" => {
let func = value.child_by_field_name("function")?;
if func.kind() != "scoped_identifier" {
return None;
}
let name = func
.child_by_field_name("name")
.map(|n| state.node_text(n))?;
if !Self::CONSTRUCTOR_NAMES.contains(&name.as_str()) {
return None;
}
let path = func.child_by_field_name("path")?;
let ty = Self::normalize_type_name(&state.node_text(path))?;
if ty == "Self" {
self_type.map(str::to_string)
} else {
Some(ty)
}
}
"struct_expression" => {
let name = value.child_by_field_name("name")?;
Self::normalize_type_name(&state.node_text(name))
}
"reference_expression" | "try_expression" => value
.named_child(0)
.and_then(|inner| Self::infer_expr_type(state, inner, self_type)),
_ => None,
}
}
fn collect_var_types(
state: &ExtractionState,
fn_node: TsNode<'_>,
self_type: Option<&str>,
) -> std::collections::HashMap<String, String> {
let mut map = std::collections::HashMap::new();
if let Some(t) = self_type {
map.insert("self".to_string(), t.to_string());
}
if let Some(params) = fn_node.child_by_field_name("parameters") {
let mut cursor = params.walk();
if cursor.goto_first_child() {
loop {
let p = cursor.node();
if p.kind() == "parameter" {
if let (Some(pat), Some(ty)) = (
p.child_by_field_name("pattern"),
p.child_by_field_name("type"),
) {
if pat.kind() == "identifier" {
if let Some(tn) = Self::normalize_type_name(&state.node_text(ty)) {
map.insert(state.node_text(pat), tn);
}
}
}
}
if !cursor.goto_next_sibling() {
break;
}
}
}
}
if let Some(body) = fn_node.child_by_field_name("body") {
Self::collect_let_types(state, body, self_type, &mut map);
}
map
}
fn collect_let_types(
state: &ExtractionState,
node: TsNode<'_>,
self_type: Option<&str>,
map: &mut std::collections::HashMap<String, String>,
) {
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
let c = cursor.node();
if c.kind() == "let_declaration" {
if let Some(var) = c
.child_by_field_name("pattern")
.and_then(|pat| Self::binding_ident(state, pat))
{
let ty = c
.child_by_field_name("type")
.and_then(|t| Self::normalize_type_name(&state.node_text(t)))
.or_else(|| {
c.child_by_field_name("value")
.and_then(|v| Self::infer_expr_type(state, v, self_type))
});
if let Some(t) = ty {
map.insert(var, t);
}
}
}
if c.kind() != "function_item" {
Self::collect_let_types(state, c, self_type, map);
}
if !cursor.goto_next_sibling() {
break;
}
}
}
}
fn extract_typed_method_calls(
state: &mut ExtractionState,
node: TsNode<'_>,
fn_node_id: &str,
var_types: &std::collections::HashMap<String, String>,
) {
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
let child = cursor.node();
if child.kind() == "call_expression" {
if let Some(func) = child.child_by_field_name("function") {
if func.kind() == "field_expression" {
if let (Some(recv), Some(field)) = (
func.child_by_field_name("value"),
func.child_by_field_name("field"),
) {
if field.kind() == "field_identifier" {
let ty = match recv.kind() {
"self" => var_types.get("self").cloned(),
"identifier" => {
var_types.get(&state.node_text(recv)).cloned()
}
_ => None,
};
if let Some(ty) = ty {
let method = state.node_text(field);
state.unresolved_refs.push(UnresolvedRef {
from_node_id: fn_node_id.to_string(),
reference_name: format!("{ty}::{method}"),
reference_kind: EdgeKind::Calls,
line: child.start_position().row as u32,
column: child.start_position().column as u32,
file_path: state.file_path.clone(),
});
}
}
}
}
}
}
if child.kind() != "function_item" {
Self::extract_typed_method_calls(state, child, fn_node_id, var_types);
}
if !cursor.goto_next_sibling() {
break;
}
}
}
}
fn extract_call_sites(state: &mut ExtractionState, node: TsNode<'_>, fn_node_id: &str) {
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
let child = cursor.node();
match child.kind() {
"call_expression" => {
if let Some(callee) = child.child_by_field_name("function") {
let callee_name = state.node_text(callee);
state.unresolved_refs.push(UnresolvedRef {
from_node_id: fn_node_id.to_string(),
reference_name: callee_name.clone(),
reference_kind: EdgeKind::Calls,
line: child.start_position().row as u32,
column: child.start_position().column as u32,
file_path: state.file_path.clone(),
});
if let Some(method_name) = callee_name.rsplit('.').next() {
if method_name != callee_name {
state.unresolved_refs.push(UnresolvedRef {
from_node_id: fn_node_id.to_string(),
reference_name: method_name.to_string(),
reference_kind: EdgeKind::Calls,
line: child.start_position().row as u32,
column: child.start_position().column as u32,
file_path: state.file_path.clone(),
});
}
}
}
Self::extract_call_sites(state, child, fn_node_id);
}
"macro_invocation" => {
let macro_name = child.child_by_field_name("macro").map_or_else(
|| {
let text = state.node_text(child);
text.split('!').next().unwrap_or("").trim().to_string()
},
|n| state.node_text(n),
);
state.unresolved_refs.push(UnresolvedRef {
from_node_id: fn_node_id.to_string(),
reference_name: macro_name,
reference_kind: EdgeKind::Calls,
line: child.start_position().row as u32,
column: child.start_position().column as u32,
file_path: state.file_path.clone(),
});
Self::extract_call_sites(state, child, fn_node_id);
}
"token_tree" => {
Self::extract_calls_in_token_tree(state, child, fn_node_id);
}
"function_item" => {}
_ => {
Self::extract_call_sites(state, child, fn_node_id);
}
}
if !cursor.goto_next_sibling() {
break;
}
}
}
}
fn extract_calls_in_token_tree(
state: &mut ExtractionState,
node: TsNode<'_>,
fn_node_id: &str,
) {
let mut children = Vec::new();
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
children.push(cursor.node());
if !cursor.goto_next_sibling() {
break;
}
}
}
let mut i = 0;
while i < children.len() {
let cur = children[i];
if cur.kind() == "identifier" {
if i + 1 < children.len() && children[i + 1].kind() == "token_tree" {
let callee_name = state.node_text(cur);
state.unresolved_refs.push(UnresolvedRef {
from_node_id: fn_node_id.to_string(),
reference_name: callee_name,
reference_kind: EdgeKind::Calls,
line: cur.start_position().row as u32,
column: cur.start_position().column as u32,
file_path: state.file_path.clone(),
});
Self::extract_calls_in_token_tree(state, children[i + 1], fn_node_id);
i += 2; continue;
}
} else if cur.kind() == "token_tree" {
Self::extract_calls_in_token_tree(state, cur, fn_node_id);
} else if cur.kind() == "macro_invocation" {
Self::extract_call_sites(state, cur, fn_node_id);
}
i += 1;
}
}
fn extract_derive_macros(state: &mut ExtractionState, node: TsNode<'_>, item_id: &str) {
let mut current = node.prev_named_sibling();
while let Some(sibling) = current {
if sibling.kind() == "attribute_item" {
let text = state.node_text(sibling);
if text.contains("derive") {
Self::parse_derive_list(state, &text, item_id, sibling);
}
current = sibling.prev_named_sibling();
} else if sibling.kind() == "line_comment" || sibling.kind() == "block_comment" {
current = sibling.prev_named_sibling();
} else {
break;
}
}
}
fn parse_derive_list(
state: &mut ExtractionState,
attr_text: &str,
item_id: &str,
attr_node: TsNode<'_>,
) {
if let Some(start) = attr_text.find("derive(") {
let after = &attr_text[start + 7..];
if let Some(end) = after.find(')') {
let inner = &after[..end];
let line = attr_node.start_position().row as u32;
for trait_name in inner.split(',') {
let trait_name = trait_name.trim();
if !trait_name.is_empty() {
state.unresolved_refs.push(UnresolvedRef {
from_node_id: item_id.to_string(),
reference_name: trait_name.to_string(),
reference_kind: EdgeKind::DerivesMacro,
line,
column: attr_node.start_position().column as u32,
file_path: state.file_path.clone(),
});
}
}
}
}
}
fn compute_attrs_start_line(node: TsNode<'_>) -> u32 {
let mut earliest = node.start_position().row as u32;
let mut current = node.prev_named_sibling();
while let Some(sibling) = current {
match sibling.kind() {
"attribute_item" | "line_comment" | "block_comment" => {
earliest = sibling.start_position().row as u32;
current = sibling.prev_named_sibling();
}
_ => break,
}
}
earliest
}
fn emit_type_refs(
state: &mut ExtractionState,
type_node: TsNode<'_>,
from_id: &str,
kind: EdgeKind,
) {
let mut cursor = type_node.walk();
Self::emit_type_refs_walk(state, &mut cursor, from_id, kind);
}
fn emit_type_refs_walk(
state: &mut ExtractionState,
cursor: &mut tree_sitter::TreeCursor<'_>,
from_id: &str,
kind: EdgeKind,
) {
let n = cursor.node();
if n.kind() == "type_identifier" || n.kind() == "primitive_type" {
state.unresolved_refs.push(UnresolvedRef {
from_node_id: from_id.to_string(),
reference_name: state.node_text(n),
reference_kind: kind,
line: n.start_position().row as u32,
column: n.start_position().column as u32,
file_path: state.file_path.clone(),
});
}
if cursor.goto_first_child() {
loop {
Self::emit_type_refs_walk(state, cursor, from_id, kind);
if !cursor.goto_next_sibling() {
break;
}
}
cursor.goto_parent();
}
}
fn extract_annotations_from_modifiers(
state: &mut ExtractionState,
node: TsNode<'_>,
target_id: &str,
) {
let mut current = node.prev_named_sibling();
while let Some(sibling) = current {
if sibling.kind() == "attribute_item" {
let text = state.node_text(sibling);
if !text.contains("derive") {
Self::extract_annotations_from_node(state, sibling, target_id);
}
current = sibling.prev_named_sibling();
} else if sibling.kind() == "line_comment" || sibling.kind() == "block_comment" {
current = sibling.prev_named_sibling();
} else {
break;
}
}
}
fn extract_annotations_from_node(
state: &mut ExtractionState,
node: TsNode<'_>,
target_id: &str,
) {
let annot_name = Self::extract_annotation_name(state, node);
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let start_column = node.start_position().column as u32;
let end_column = node.end_position().column as u32;
let qualified_name = format!("{}::@{}", state.qualified_prefix(), annot_name);
let id = generate_node_id(
&state.file_path,
&NodeKind::AnnotationUsage,
&annot_name,
start_line,
);
let graph_node = Node {
id: id.clone(),
kind: NodeKind::AnnotationUsage,
name: annot_name.clone(),
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: Self::compute_attrs_start_line(node),
end_line,
start_column,
end_column,
signature: Some(state.node_text(node).trim().to_string()),
docstring: None,
visibility: Visibility::Private,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
parent_id: None,
};
state.nodes.push(graph_node);
state.unresolved_refs.push(UnresolvedRef {
from_node_id: id.clone(),
reference_name: annot_name,
reference_kind: EdgeKind::Annotates,
line: start_line,
column: start_column,
file_path: state.file_path.clone(),
});
state.edges.push(Edge {
source: id,
target: target_id.to_string(),
kind: EdgeKind::Annotates,
line: Some(start_line),
});
}
fn extract_annotation_name(state: &ExtractionState, node: TsNode<'_>) -> String {
let text = state.node_text(node);
let trimmed = text.trim();
let inner = trimmed
.strip_prefix("#[")
.unwrap_or(trimmed)
.strip_suffix(']')
.unwrap_or(trimmed);
inner.split('(').next().unwrap_or(inner).trim().to_string()
}
fn build_result(state: ExtractionState, start: Instant) -> ExtractionResult {
ExtractionResult {
nodes: state.nodes,
edges: state.edges,
unresolved_refs: state.unresolved_refs,
errors: state.errors,
duration_ms: start.elapsed().as_millis() as u64,
}
}
}
impl crate::extraction::LanguageExtractor for RustExtractor {
fn extensions(&self) -> &[&str] {
&["rs"]
}
fn language_name(&self) -> &'static str {
"Rust"
}
fn extract(&self, file_path: &str, source: &str) -> ExtractionResult {
RustExtractor::extract(file_path, source)
}
}