use std::time::{Instant, SystemTime, UNIX_EPOCH};
use tree_sitter::{Node as TsNode, Parser, Tree};
use crate::types::{
generate_node_id, Edge, EdgeKind, ExtractionResult, Node, NodeKind, Visibility,
};
pub struct LeanExtractor;
struct ExtractionState {
nodes: Vec<Node>,
edges: Vec<Edge>,
file_path: String,
source: Vec<u8>,
file_node_id: String,
timestamp: u64,
scope_stack: Vec<(String, String)>,
}
impl ExtractionState {
fn new(file_path: &str, source: &str) -> Self {
let timestamp = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let file_node_id = generate_node_id(file_path, &NodeKind::File, file_path, 0);
Self {
nodes: Vec::new(),
edges: Vec::new(),
file_path: file_path.to_string(),
source: source.as_bytes().to_vec(),
file_node_id,
timestamp,
scope_stack: Vec::new(),
}
}
fn node_text(&self, node: TsNode<'_>) -> String {
node.utf8_text(&self.source)
.unwrap_or("<invalid utf8>")
.to_string()
}
}
impl LeanExtractor {
pub fn extract_lean(file_path: &str, source: &str) -> ExtractionResult {
let start = Instant::now();
let mut state = ExtractionState::new(file_path, source);
let file_node = Node {
id: state.file_node_id.clone(),
kind: NodeKind::File,
name: file_path.to_string(),
qualified_name: file_path.to_string(),
file_path: file_path.to_string(),
start_line: 0,
attrs_start_line: 0,
end_line: source.lines().count().saturating_sub(1) as u32,
start_column: 0,
end_column: 0,
signature: None,
docstring: None,
visibility: Visibility::Pub,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
};
state.nodes.push(file_node);
state
.scope_stack
.push((file_path.to_string(), state.file_node_id.clone()));
if let Ok(tree) = Self::parse(source) {
Self::visit(&mut state, tree.root_node());
}
ExtractionResult {
nodes: state.nodes,
edges: state.edges,
unresolved_refs: Vec::new(),
errors: Vec::new(),
duration_ms: start.elapsed().as_millis() as u64,
}
}
fn parse(source: &str) -> Result<Tree, String> {
let mut parser = Parser::new();
let language = crate::extraction::ts_provider::language("lean");
parser
.set_language(&language)
.map_err(|e| format!("failed to load Lean grammar: {e}"))?;
parser
.parse(source, None)
.ok_or_else(|| "tree-sitter parse returned None".to_string())
}
fn visit(state: &mut ExtractionState, node: TsNode<'_>) {
if !node.is_named() {
return;
}
match node.kind() {
"module" | "declaration" => Self::visit_children(state, node),
"namespace" | "section" => Self::visit_namespace(state, node),
"import" => Self::visit_import(state, node),
"def" | "abbrev" | "theorem" => {
Self::emit_named(state, node, NodeKind::Function);
}
"instance" => Self::emit_if_named(state, node, NodeKind::Const),
"axiom" | "constant" => {
Self::emit_named(state, node, NodeKind::Const);
}
"structure" => {
Self::emit_named(state, node, NodeKind::Struct);
}
"inductive" | "class_inductive" => {
Self::emit_named(state, node, NodeKind::Enum);
}
_ => {}
}
}
fn visit_children(state: &mut ExtractionState, node: TsNode<'_>) {
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
Self::visit(state, cursor.node());
if !cursor.goto_next_sibling() {
break;
}
}
}
}
fn visit_namespace(state: &mut ExtractionState, node: TsNode<'_>) {
let name = node.child_by_field_name("name").map(|n| state.node_text(n));
let pushed = if let Some(name) = name.as_deref() {
let id = Self::emit_node(state, node, NodeKind::Module, name);
let parent_qn = match state.scope_stack.last() {
Some((qn, _)) => qn.clone(),
None => state.file_path.clone(),
};
state.scope_stack.push((format!("{parent_qn}::{name}"), id));
true
} else {
false
};
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
let child = cursor.node();
let field = cursor.field_name();
if field != Some("name") {
Self::visit(state, child);
}
if !cursor.goto_next_sibling() {
break;
}
}
}
if pushed {
state.scope_stack.pop();
}
}
fn visit_import(state: &mut ExtractionState, node: TsNode<'_>) {
if let Some(n) = node.child_by_field_name("module") {
let target = state.node_text(n);
Self::push_use_edge(state, node, &target);
return;
}
let mut cursor = node.walk();
if cursor.goto_first_child() {
loop {
if cursor.node().kind() == "identifier" {
let target = state.node_text(cursor.node());
Self::push_use_edge(state, node, &target);
break;
}
if !cursor.goto_next_sibling() {
break;
}
}
}
}
fn push_use_edge(state: &mut ExtractionState, node: TsNode<'_>, target_path: &str) {
let target_id = generate_node_id(target_path, &NodeKind::File, target_path, 0);
let parent_id = match state.scope_stack.last() {
Some((_, id)) => id.clone(),
None => state.file_node_id.clone(),
};
state.edges.push(Edge {
source: parent_id,
target: target_id,
kind: EdgeKind::Uses,
line: Some(node.start_position().row as u32),
});
}
fn emit_named(state: &mut ExtractionState, node: TsNode<'_>, kind: NodeKind) -> String {
let name = match node.child_by_field_name("name") {
Some(n) => state.node_text(n),
None => format!("<anonymous_{}>", node.kind()),
};
Self::emit_node(state, node, kind, &name)
}
fn emit_if_named(state: &mut ExtractionState, node: TsNode<'_>, kind: NodeKind) {
if let Some(name_node) = node.child_by_field_name("name") {
let name = state.node_text(name_node);
Self::emit_node(state, node, kind, &name);
}
}
fn emit_node(
state: &mut ExtractionState,
node: TsNode<'_>,
kind: NodeKind,
name: &str,
) -> String {
let start_line = node.start_position().row as u32;
let end_line = node.end_position().row as u32;
let parent_qn = match state.scope_stack.last() {
Some((qn, _)) => qn.clone(),
None => state.file_path.clone(),
};
let qualified_name = format!("{parent_qn}::{name}");
let id = generate_node_id(&state.file_path, &kind, name, start_line);
let signature = state
.node_text(node)
.lines()
.next()
.map(|l| l.trim().to_string());
let new_node = Node {
id: id.clone(),
kind,
name: name.to_string(),
qualified_name,
file_path: state.file_path.clone(),
start_line,
attrs_start_line: start_line,
end_line,
start_column: node.start_position().column as u32,
end_column: node.end_position().column as u32,
signature,
docstring: None,
visibility: Visibility::Pub,
is_async: false,
branches: 0,
loops: 0,
returns: 0,
max_nesting: 0,
unsafe_blocks: 0,
unchecked_calls: 0,
assertions: 0,
updated_at: state.timestamp,
};
state.nodes.push(new_node);
if let Some((_, parent_id)) = state.scope_stack.last() {
state.edges.push(Edge {
source: parent_id.clone(),
target: id.clone(),
kind: EdgeKind::Contains,
line: Some(start_line),
});
}
id
}
}
impl crate::extraction::LanguageExtractor for LeanExtractor {
fn extensions(&self) -> &[&str] {
&["lean"]
}
fn language_name(&self) -> &'static str {
"Lean"
}
fn extract(&self, file_path: &str, source: &str) -> ExtractionResult {
Self::extract_lean(file_path, source)
}
}