#![allow(dead_code)]
use tree_sitter::{Language as TsLanguage, Node, Query, QueryCursor, StreamingIterator};
use crate::error::{CodegraphError, Result};
use crate::graph::types::{
Binding, BindingKind, BindingTarget, ByteSpan, Occurrence, RefRole, Reference, Scope, ScopeId,
ScopeKind, Symbol, SymbolKind, TypeRefContext, Visibility,
};
use crate::lang::Language;
use crate::symbol::{Descriptor, SymbolId};
pub(crate) struct ExtractCtx<'a> {
pub bytes: &'a [u8],
pub file: &'a str,
pub lang: Language,
}
pub(crate) fn make_symbol(
ctx: &ExtractCtx,
span_node: &Node,
name: String,
kind: SymbolKind,
visibility: Visibility,
descriptors: Vec<Descriptor>,
signature: String,
) -> Symbol {
Symbol {
id: SymbolId::global(ctx.lang.as_str(), descriptors),
name,
kind,
visibility,
entry_points: Vec::new(),
file: ctx.file.to_owned(),
line: (span_node.start_position().row + 1) as u32,
span: ByteSpan {
start: span_node.start_byte(),
end: span_node.end_byte(),
},
signature,
}
}
pub(crate) fn node_text<'a>(node: &Node, bytes: &'a [u8]) -> &'a str {
std::str::from_utf8(&bytes[node.start_byte()..node.end_byte()]).unwrap_or("<invalid utf8>")
}
pub(crate) fn one_line_signature(text: &str, stop: &[char]) -> String {
let mut depth = 0i32;
let mut end = text.len();
let mut found = false;
for (i, c) in text.char_indices() {
if depth == 0 && stop.contains(&c) {
end = i;
found = true;
break;
}
match c {
'{' | '(' | '[' => depth += 1,
'}' | ')' | ']' => depth -= 1,
_ => {}
}
}
let sig = if found {
&text[..end]
} else {
text.lines().next().unwrap_or(text)
};
sig.split_whitespace().collect::<Vec<_>>().join(" ")
}
pub(crate) const MIN_REF_LEN: usize = 3;
pub(crate) fn child_text(node: &Node, kind: &str, bytes: &[u8]) -> Option<String> {
node.children(&mut node.walk())
.find(|c| c.kind() == kind)
.map(|c| node_text(&c, bytes).to_owned())
}
pub(crate) fn field_text(node: &Node, field: &str, bytes: &[u8]) -> Option<String> {
node.child_by_field_name(field)
.map(|n| node_text(&n, bytes).to_owned())
}
pub(crate) fn module_name(namespaces: &[String], file: &str) -> String {
if let Some(leaf) = namespaces.last() {
return leaf.clone();
}
let stem = file.rsplit('/').next().unwrap_or(file);
stem.split('.').next().unwrap_or(stem).to_owned()
}
pub(crate) fn module_symbol(
lang: Language,
namespaces: &[String],
file: &str,
source_len: usize,
) -> Symbol {
let mut descriptors: Vec<Descriptor> = namespaces
.iter()
.cloned()
.map(Descriptor::Namespace)
.collect();
let name = module_name(namespaces, file);
if descriptors.is_empty() && !name.is_empty() {
descriptors.push(Descriptor::Namespace(name.clone()));
}
Symbol {
id: SymbolId::global(lang.as_str(), descriptors),
name,
kind: SymbolKind::Module,
visibility: Visibility::Public,
entry_points: Vec::new(),
file: file.to_owned(),
line: 1,
span: ByteSpan {
start: 0,
end: source_len,
},
signature: String::new(),
}
}
pub(crate) fn simple_type_name<'a>(text: &'a str, sep: &str) -> &'a str {
let base = text.split_once('<').map_or(text, |(b, _)| b);
base.rsplit_once(sep).map_or(base, |(_, a)| a).trim()
}
#[inline]
pub(crate) fn node_occurrence(node: &Node, file: &str) -> Occurrence {
Occurrence {
file: file.to_owned(),
line: (node.start_position().row + 1) as u32,
col: node.start_position().column as u32,
byte: node.start_byte(),
}
}
pub(crate) fn push_ref(
out: &mut Vec<Reference>,
name: &str,
node: &Node,
file: &str,
role: RefRole,
) {
if name.is_empty() {
return;
}
out.push(Reference {
name: name.to_owned(),
occ: node_occurrence(node, file),
role,
source_module: None,
from_path: None,
qualifier: None,
scope: None,
type_ref_ctx: None,
});
}
pub(crate) fn push_import_ref(
out: &mut Vec<Reference>,
name: &str,
node: &Node,
file: &str,
module_id: &str,
from_path: &str,
) {
if name.is_empty() {
return;
}
out.push(Reference {
name: name.to_owned(),
occ: node_occurrence(node, file),
role: RefRole::Import,
source_module: Some(module_id.to_owned()),
from_path: if from_path.is_empty() {
None
} else {
Some(from_path.to_owned())
},
qualifier: None,
scope: None,
type_ref_ctx: None,
});
}
pub(crate) fn push_type_ref(
out: &mut Vec<Reference>,
name: &str,
node: &Node,
file: &str,
ctx: TypeRefContext,
) {
if name.is_empty() {
return;
}
out.push(Reference {
name: name.to_owned(),
occ: node_occurrence(node, file),
role: RefRole::TypeRef,
source_module: None,
from_path: None,
qualifier: None,
scope: None,
type_ref_ctx: Some(ctx),
});
}
pub(crate) fn unquote(text: &str) -> &str {
let b = text.as_bytes();
if b.len() >= 2 {
let (first, last) = (b[0], b[b.len() - 1]);
if (first == b'"' && last == b'"') || (first == b'`' && last == b'`') {
return &text[1..text.len() - 1];
}
}
text
}
pub(crate) fn is_static(node: &Node, bytes: &[u8]) -> bool {
node.children(&mut node.walk())
.any(|c| c.kind() == "storage_class_specifier" && node_text(&c, bytes) == "static")
}
pub(crate) fn collect_call_references(
root: &Node,
ts_lang: &TsLanguage,
query_src: &str,
lang: Language,
bytes: &[u8],
file: &str,
) -> Result<Vec<Reference>> {
let query = Query::new(ts_lang, query_src).map_err(|e| CodegraphError::Query {
lang: lang.as_str().to_owned(),
msg: e.to_string(),
})?;
let callee_idx =
query
.capture_index_for_name("callee")
.ok_or_else(|| CodegraphError::Query {
lang: lang.as_str().to_owned(),
msg: "missing @callee capture".to_owned(),
})?;
let qualifier_idx = query.capture_index_for_name("qualifier");
let mut cursor = QueryCursor::new();
let mut matches = cursor.matches(&query, *root, bytes);
let mut refs = Vec::new();
while let Some(m) = matches.next() {
let qualifier = qualifier_idx.and_then(|qi| {
m.captures
.iter()
.find(|c| c.index == qi)
.map(|c| node_text(&c.node, bytes).to_owned())
});
for cap in m.captures.iter().filter(|c| c.index == callee_idx) {
let name = node_text(&cap.node, bytes).to_owned();
if name.len() < MIN_REF_LEN {
continue;
}
refs.push(Reference {
name,
occ: node_occurrence(&cap.node, file),
role: RefRole::Call,
source_module: None,
from_path: None,
qualifier: qualifier.clone(),
scope: None,
type_ref_ctx: None,
});
}
}
Ok(refs)
}
pub(crate) fn node_span(node: &Node) -> ByteSpan {
ByteSpan {
start: node.start_byte(),
end: node.end_byte(),
}
}
pub(crate) fn push_scope(
scopes: &mut Vec<Scope>,
parent: Option<ScopeId>,
span: ByteSpan,
kind: ScopeKind,
) -> ScopeId {
let id = scopes.len();
scopes.push(Scope { parent, span, kind });
id
}
pub(crate) fn innermost_scope(byte: usize, scopes: &[Scope]) -> Option<ScopeId> {
scopes
.iter()
.enumerate()
.filter(|(_, s)| s.span.contains(byte))
.min_by_key(|(id, s)| (s.span.len(), std::cmp::Reverse(*id)))
.map(|(id, _)| id)
}
pub(crate) fn attach_reference_scopes(refs: &mut [Reference], scopes: &[Scope]) {
for r in refs {
r.scope = innermost_scope(r.occ.byte, scopes);
}
}
#[inline]
pub(crate) fn push_binding(
out: &mut Vec<Binding>,
name: String,
intro: usize,
kind: BindingKind,
scopes: &[Scope],
) {
let scope = innermost_scope(intro, scopes).unwrap_or(0);
out.push(Binding {
scope,
name,
intro,
kind,
target: BindingTarget::Local,
});
}
pub(crate) fn definition_bindings(defs: &[Symbol]) -> Vec<Binding> {
defs.iter()
.map(|d| Binding {
scope: 0,
name: d.name.clone(),
intro: d.span.start,
kind: BindingKind::Definition,
target: BindingTarget::Def(d.id.clone()),
})
.collect()
}
pub(crate) fn import_bindings(refs: &[Reference], scopes: &[Scope]) -> Vec<Binding> {
refs.iter()
.filter(|r| r.role == RefRole::Import)
.map(|r| Binding {
scope: innermost_scope(r.occ.byte, scopes).unwrap_or(0),
name: r.name.clone(),
intro: r.occ.byte,
kind: BindingKind::Import,
target: BindingTarget::Import(r.from_path.clone().unwrap_or_default()),
})
.collect()
}
pub(crate) fn byte_to_line_col(bytes: &[u8], byte: usize) -> (u32, u32) {
let safe = byte.min(bytes.len());
let prefix = &bytes[..safe];
let line = 1 + prefix.iter().filter(|&&b| b == b'\n').count() as u32;
let col = prefix.iter().rev().take_while(|&&b| b != b'\n').count() as u32;
(line, col)
}
pub(crate) fn shift_offsets(
facts: &mut crate::graph::types::FileFacts,
delta: usize,
file: &str,
lang: &str,
embedding_bytes: &[u8],
) {
facts.file = file.to_owned();
facts.lang = lang.to_owned();
for sym in &mut facts.symbols {
sym.file = file.to_owned();
sym.span.start += delta;
sym.span.end += delta;
sym.line = byte_to_line_col(embedding_bytes, sym.span.start).0;
}
for scope in &mut facts.scopes {
scope.span.start += delta;
scope.span.end += delta;
}
for r in &mut facts.references {
r.occ.file = file.to_owned();
r.occ.byte += delta;
let (line, col) = byte_to_line_col(embedding_bytes, r.occ.byte);
r.occ.line = line;
r.occ.col = col;
}
for b in &mut facts.bindings {
b.intro += delta;
}
}
#[cfg(test)]
mod tests {
#[test]
fn unquote_removes_double_quotes() {
assert_eq!(super::unquote(r#""my table""#), "my table");
}
#[test]
fn unquote_removes_backticks() {
assert_eq!(super::unquote("`my_table`"), "my_table");
}
#[test]
fn unquote_bare_and_empty_unchanged() {
assert_eq!(super::unquote("users"), "users");
assert_eq!(super::unquote(""), "");
}
#[cfg(feature = "rust")]
#[test]
fn make_symbol_from_extract_ctx() {
use crate::graph::types::{SymbolKind, Visibility};
use crate::lang::Language;
use crate::symbol::Descriptor;
use tree_sitter::Parser;
let ts_lang = crate::grammar::rust();
let mut parser = Parser::new();
parser.set_language(&ts_lang).unwrap();
let src = b"fn f() {}";
let tree = parser.parse(src, None).unwrap();
let root = tree.root_node();
let fn_node = root.named_child(0).unwrap();
assert_eq!(fn_node.kind(), "function_item");
let ctx = super::ExtractCtx {
bytes: src,
file: "src/lib.rs",
lang: Language::Rust,
};
let sym = super::make_symbol(
&ctx,
&fn_node,
"f".to_owned(),
SymbolKind::Function,
Visibility::Private,
vec![Descriptor::Term("f".to_owned())],
"fn f()".to_owned(),
);
assert_eq!(sym.file, "src/lib.rs");
assert_eq!(sym.name, "f");
assert_eq!(sym.kind, SymbolKind::Function);
assert_eq!(sym.visibility, Visibility::Private);
assert_eq!(sym.signature, "fn f()");
assert_eq!(sym.line, 1, "first line is 1-based");
assert_eq!(sym.span.start, fn_node.start_byte());
assert_eq!(sym.span.end, fn_node.end_byte());
}
#[cfg(feature = "rust")]
#[test]
fn emits_module_symbol() {
use crate::extract::Extractor as _;
use crate::extract::RustExtractor;
use crate::graph::types::SymbolKind;
let facts = RustExtractor
.extract("pub fn f() {}", "src/util.rs")
.unwrap();
let module_syms: Vec<_> = facts
.symbols
.iter()
.filter(|s| s.kind == SymbolKind::Module)
.collect();
assert_eq!(module_syms.len(), 1, "expected exactly one Module symbol");
assert_eq!(
module_syms[0].name, "util",
"module name should be the file stem"
);
}
}