mod entities;
mod relations;
pub use entities::extract_entities;
pub use relations::{extract_relations, extract_relations_with_custom_edges};
use anyhow::Result;
use sha2::{Digest, Sha256};
use crate::analysis::extract_statements;
use crate::lang::{LanguagePack, ParserBackend};
use crate::model::{FileExtraction, Relation, RelationKind, Span, Statement, SymbolKind};
thread_local! {
static TS_PARSER: std::cell::RefCell<tree_sitter::Parser> = std::cell::RefCell::new(tree_sitter::Parser::new());
}
pub fn extract_file(path: &str, source: &[u8], pack: &LanguagePack) -> Result<FileExtraction> {
let (symbols, mut relations, statements) = match &pack.backend {
ParserBackend::TreeSitter {
grammar,
entity_query,
relation_query,
} => TS_PARSER.with(|cell| -> Result<_> {
let mut parser = cell.borrow_mut();
parser.set_language(grammar)?;
let tree = parser
.parse(source, None)
.ok_or_else(|| anyhow::anyhow!("failed to parse {}", path))?;
let root = tree.root_node();
let symbols = extract_entities(path, source, root, entity_query, &pack.name);
let relations = if pack.custom_edges.is_empty() {
extract_relations(path, source, root, relation_query)
} else {
extract_relations_with_custom_edges(
path,
source,
root,
relation_query,
&pack.custom_edges,
)
};
let stmts = extract_statements_for_symbols(root, source, &symbols);
Ok((symbols, relations, stmts))
})?,
ParserBackend::Custom(extractor) => {
let (s, r) = extractor.extract(path, source, &pack.name)?;
(s, r, Vec::new())
}
};
generate_route_handler_edges(path, &symbols, &mut relations);
let content_hash = {
let mut hasher = Sha256::new();
hasher.update(source);
format!("{:x}", hasher.finalize())
};
Ok(FileExtraction {
file: path.to_string(),
language: pack.name.clone(),
content_hash,
symbols,
relations,
statements,
})
}
fn extract_statements_for_symbols(
root: tree_sitter::Node<'_>,
source: &[u8],
symbols: &[crate::model::Symbol],
) -> Vec<Statement> {
let fn_symbols: Vec<(&str, u32, u32)> = symbols
.iter()
.filter(|s| {
matches!(
s.kind,
SymbolKind::Function | SymbolKind::Method | SymbolKind::Test
)
})
.map(|s| (s.id.as_str(), s.span.start_line, s.span.end_line))
.collect();
if fn_symbols.is_empty() {
return Vec::new();
}
let mut all_stmts = Vec::new();
collect_fn_nodes(root, source, &fn_symbols, &mut all_stmts);
let mut seen = std::collections::HashSet::new();
all_stmts.retain(|s| seen.insert(s.id.clone()));
all_stmts
}
fn collect_fn_nodes<'a>(
node: tree_sitter::Node<'a>,
source: &'a [u8],
fn_symbols: &[(&str, u32, u32)],
stmts: &mut Vec<Statement>,
) {
let start = node.start_position().row as u32 + 1;
let end = node.end_position().row as u32 + 1;
if let Some((sym_id, _, _)) = fn_symbols
.iter()
.find(|(_, sl, el)| start == *sl && end == *el)
{
let mut extracted = extract_statements(node, source, sym_id, "");
stmts.append(&mut extracted);
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i as u32) {
collect_fn_nodes(child, source, fn_symbols, stmts);
}
}
}
fn generate_route_handler_edges(
file: &str,
symbols: &[crate::model::Symbol],
relations: &mut Vec<Relation>,
) {
let functions: Vec<(&str, &str)> = symbols
.iter()
.filter(|s| {
(s.kind == SymbolKind::Function || s.kind == SymbolKind::Method) && s.span.file == file
})
.map(|s| (s.name.as_str(), s.id.as_str()))
.collect();
for sym in symbols {
if sym.kind != SymbolKind::Route {
continue;
}
let mut target_id: Option<String> = None;
if let Some(doc) = &sym.docstring {
if let Some(handler_name) = doc.split("handler=").nth(1).map(|h| h.trim()) {
target_id = functions
.iter()
.find(|(name, _)| *name == handler_name)
.map(|(_, id)| id.to_string());
}
}
if target_id.is_none() {
target_id = symbols
.iter()
.find(|s| {
(s.kind == SymbolKind::Function || s.kind == SymbolKind::Method)
&& s.span.file == file
&& s.span.start_line <= sym.span.end_line
&& s.span.end_line >= sym.span.start_line
})
.map(|s| s.id.clone());
}
if let Some(tid) = target_id {
if tid != sym.id {
relations.push(Relation {
source_id: sym.id.clone(),
target_id: tid,
kind: RelationKind::Calls,
span: Some(Span {
file: file.to_string(),
start_line: sym.span.start_line,
start_col: sym.span.start_col,
end_line: sym.span.end_line,
end_col: sym.span.end_col,
}),
receiver: None,
});
}
}
}
}