use tree_sitter::{Node, Parser};
use crate::error::{CodegraphError, Result};
use crate::graph::types::{
Binding, BindingKind, ByteSpan, EntryPoint, FileFacts, RefRole, Reference, Scope, ScopeId,
ScopeKind, Symbol, SymbolKind, TypeRefContext, Visibility,
};
use crate::lang::Language;
use crate::symbol::Descriptor;
use super::{
ExtractCtx, Extractor, MIN_REF_LEN, attach_reference_scopes, collect_call_references,
definition_bindings, field_text, innermost_scope, is_static, make_symbol, node_span, node_text,
one_line_signature, push_binding, push_ref, push_scope, push_type_ref,
};
const CALL_QUERY: &str = r#"
[
(call_expression function: (identifier) @callee)
(call_expression function: (field_expression field: (field_identifier) @callee))
(call_expression function: (qualified_identifier name: (identifier) @callee))
]
"#;
pub struct CppExtractor;
impl Extractor for CppExtractor {
fn lang(&self) -> Language {
Language::Cpp
}
fn extract(&self, source: &str, file: &str) -> Result<FileFacts> {
let ts_language = crate::grammar::cpp();
let mut parser = Parser::new();
parser
.set_language(&ts_language)
.map_err(|_| CodegraphError::Parse {
path: file.to_owned(),
})?;
let tree = parser
.parse(source, None)
.ok_or_else(|| CodegraphError::Parse {
path: file.to_owned(),
})?;
let root = tree.root_node();
let bytes = source.as_bytes();
let ctx = ExtractCtx {
bytes,
file,
lang: Language::Cpp,
};
let namespaces = cpp_namespaces(file);
let mut defs = Vec::new();
collect_defs(&root, &namespaces, &ctx, &mut defs);
let def_bindings = definition_bindings(&defs);
let mut symbols = defs;
symbols.push(super::module_symbol(
Language::Cpp,
&namespaces,
file,
source.len(),
));
let mut references =
collect_call_references(&root, &ts_language, CALL_QUERY, Language::Cpp, bytes, file)?;
collect_inheritance(&root, bytes, file, &mut references);
collect_read_references(&root, bytes, file, &mut references);
collect_write_references(&root, bytes, file, &mut references);
collect_type_references(&root, bytes, file, &mut references);
let scopes = collect_scopes(&root, source.len());
attach_reference_scopes(&mut references, &scopes);
let mut bindings = collect_bindings(&root, bytes, &scopes);
bindings.extend(def_bindings);
Ok(FileFacts {
file: file.to_owned(),
lang: Language::Cpp.as_str().to_owned(),
symbols,
references,
scopes,
bindings,
ffi_exports: Vec::new(),
})
}
}
fn cpp_namespaces(file: &str) -> Vec<String> {
let p = [".cc", ".cpp", ".cxx", ".hh", ".hpp", ".hxx", ".c", ".h"]
.iter()
.find_map(|ext| file.strip_suffix(ext))
.unwrap_or(file);
let p = p.strip_prefix("src/").unwrap_or(p);
p.split('/')
.filter(|s| !s.is_empty())
.map(str::to_owned)
.collect()
}
fn declarator_name(node: &Node, bytes: &[u8]) -> Option<(String, bool)> {
match node.kind() {
"identifier" | "type_identifier" | "field_identifier" | "destructor_name"
| "operator_name" => Some((node_text(node, bytes).to_owned(), false)),
"qualified_identifier" => {
let text = node_text(node, bytes);
let last = text.rsplit("::").next().unwrap_or(text);
Some((last.to_owned(), false))
}
"function_declarator" => {
let inner = node.child_by_field_name("declarator")?;
let (name, _) = declarator_name(&inner, bytes)?;
Some((name, true))
}
_ => {
if let Some(d) = node.child_by_field_name("declarator") {
return declarator_name(&d, bytes);
}
for c in node.children(&mut node.walk()) {
if let Some(r) = declarator_name(&c, bytes) {
return Some(r);
}
}
None
}
}
}
fn type_leaf_name(node: &Node, bytes: &[u8]) -> Option<String> {
match node.kind() {
"type_identifier" => Some(node_text(node, bytes).to_owned()),
"template_type" => node
.child_by_field_name("name")
.and_then(|n| type_leaf_name(&n, bytes)),
"qualified_identifier" => {
let text = node_text(node, bytes);
text.rsplit("::").next().map(str::to_owned)
}
_ => None,
}
}
fn push_symbol(
out: &mut Vec<Symbol>,
ctx: &ExtractCtx,
node: &Node,
prefix: &[Descriptor],
leaf: Descriptor,
kind: SymbolKind,
visibility: Visibility,
) {
let name = leaf.name().to_owned();
let mut descriptors = prefix.to_vec();
descriptors.push(leaf);
let signature = one_line_signature(node_text(node, ctx.bytes), &['{', ';']);
out.push(make_symbol(
ctx,
node,
name,
kind,
visibility,
descriptors,
signature,
));
}
fn namespace_prefix(namespaces: &[String]) -> Vec<Descriptor> {
namespaces
.iter()
.cloned()
.map(Descriptor::Namespace)
.collect()
}
fn collect_defs(container: &Node, namespaces: &[String], ctx: &ExtractCtx, out: &mut Vec<Symbol>) {
for child in container.children(&mut container.walk()) {
process_node(&child, namespaces, ctx, out);
}
}
fn process_node(node: &Node, namespaces: &[String], ctx: &ExtractCtx, out: &mut Vec<Symbol>) {
match node.kind() {
"namespace_definition" => {
let mut nested = namespaces.to_vec();
if let Some(name) = node.child_by_field_name("name") {
for seg in node_text(&name, ctx.bytes).split("::") {
if !seg.is_empty() {
nested.push(seg.to_owned());
}
}
}
if let Some(body) = node.child_by_field_name("body") {
collect_defs(&body, &nested, ctx, out);
}
}
"function_definition" => {
let vis = if is_static(node, ctx.bytes) {
Visibility::Private
} else {
Visibility::Public
};
let Some(decl) = node.child_by_field_name("declarator") else {
return;
};
let Some((name, _)) = declarator_name(&decl, ctx.bytes) else {
return;
};
let is_main = name == "main";
let prefix = namespace_prefix(namespaces);
push_symbol(
out,
ctx,
node,
&prefix,
Descriptor::Method {
name,
disambiguator: String::new(),
},
SymbolKind::Function,
vis,
);
if is_main {
if let Some(s) = out.last_mut() {
s.entry_points.push(EntryPoint::Main);
}
}
}
"declaration" => {
let vis = if is_static(node, ctx.bytes) {
Visibility::Private
} else {
Visibility::Public
};
if let Some(spec) = node.child_by_field_name("type") {
emit_aggregate(&spec, namespaces, ctx, out);
}
let prefix = namespace_prefix(namespaces);
let mut cursor = node.walk();
for decl in node.children_by_field_name("declarator", &mut cursor) {
let Some((name, is_function)) = declarator_name(&decl, ctx.bytes) else {
continue;
};
if is_function {
push_symbol(
out,
ctx,
node,
&prefix,
Descriptor::Method {
name,
disambiguator: String::new(),
},
SymbolKind::Function,
vis,
);
} else {
push_symbol(
out,
ctx,
node,
&prefix,
Descriptor::Term(name),
SymbolKind::Static,
vis,
);
}
}
}
"class_specifier" | "struct_specifier" | "union_specifier" | "enum_specifier" => {
emit_aggregate(node, namespaces, ctx, out);
}
"type_definition" => {
if let Some(spec) = node.child_by_field_name("type") {
emit_aggregate(&spec, namespaces, ctx, out);
}
let Some(decl) = node.child_by_field_name("declarator") else {
return;
};
let Some((name, _)) = declarator_name(&decl, ctx.bytes) else {
return;
};
let prefix = namespace_prefix(namespaces);
push_symbol(
out,
ctx,
node,
&prefix,
Descriptor::Type(name),
SymbolKind::TypeAlias,
Visibility::Public,
);
}
"alias_declaration" => {
let Some(name) = field_text(node, "name", ctx.bytes) else {
return;
};
let prefix = namespace_prefix(namespaces);
push_symbol(
out,
ctx,
node,
&prefix,
Descriptor::Type(name),
SymbolKind::TypeAlias,
Visibility::Public,
);
}
"template_declaration" => {
for c in node.children(&mut node.walk()) {
if matches!(
c.kind(),
"function_definition"
| "declaration"
| "alias_declaration"
| "class_specifier"
| "struct_specifier"
| "union_specifier"
) {
process_node(&c, namespaces, ctx, out);
}
}
}
"preproc_def" => {
if let Some(name) = field_text(node, "name", ctx.bytes) {
let prefix = namespace_prefix(namespaces);
push_symbol(
out,
ctx,
node,
&prefix,
Descriptor::Macro(name),
SymbolKind::Const,
Visibility::Public,
);
}
}
"preproc_function_def" => {
if let Some(name) = field_text(node, "name", ctx.bytes) {
let prefix = namespace_prefix(namespaces);
push_symbol(
out,
ctx,
node,
&prefix,
Descriptor::Macro(name),
SymbolKind::Function,
Visibility::Public,
);
}
}
_ => {}
}
}
fn emit_aggregate(spec: &Node, namespaces: &[String], ctx: &ExtractCtx, out: &mut Vec<Symbol>) {
let (kind, default_vis, is_enum) = match spec.kind() {
"class_specifier" => (SymbolKind::Class, Visibility::Private, false),
"struct_specifier" => (SymbolKind::Struct, Visibility::Public, false),
"union_specifier" => (SymbolKind::Struct, Visibility::Public, false),
"enum_specifier" => (SymbolKind::Enum, Visibility::Public, true),
_ => return,
};
let body = spec.child_by_field_name("body");
let Some(body) = body else {
return;
};
let Some(name_node) = spec.child_by_field_name("name") else {
return;
};
let Some(name) = type_leaf_name(&name_node, ctx.bytes) else {
return;
};
let prefix = namespace_prefix(namespaces);
push_symbol(
out,
ctx,
spec,
&prefix,
Descriptor::Type(name.clone()),
kind,
Visibility::Public,
);
if is_enum {
return;
}
let mut type_prefix = prefix;
type_prefix.push(Descriptor::Type(name));
collect_members(&body, &type_prefix, default_vis, ctx, out);
}
fn collect_members(
body: &Node,
type_prefix: &[Descriptor],
default_vis: Visibility,
ctx: &ExtractCtx,
out: &mut Vec<Symbol>,
) {
let mut current_vis = default_vis;
for member in body.children(&mut body.walk()) {
match member.kind() {
"access_specifier" => {
let text = node_text(&member, ctx.bytes);
current_vis = if text.starts_with("public") {
Visibility::Public
} else if text.starts_with("protected") {
Visibility::Protected
} else {
Visibility::Private
};
}
"function_definition" => {
let Some(decl) = member.child_by_field_name("declarator") else {
continue;
};
let Some((name, _)) = declarator_name(&decl, ctx.bytes) else {
continue;
};
push_symbol(
out,
ctx,
&member,
type_prefix,
Descriptor::Method {
name,
disambiguator: String::new(),
},
SymbolKind::Method,
current_vis,
);
}
"field_declaration" => {
let Some(decl) = member.child_by_field_name("declarator") else {
continue;
};
let Some((name, is_function)) = declarator_name(&decl, ctx.bytes) else {
continue;
};
if is_function {
push_symbol(
out,
ctx,
&member,
type_prefix,
Descriptor::Method {
name,
disambiguator: String::new(),
},
SymbolKind::Method,
current_vis,
);
} else {
push_symbol(
out,
ctx,
&member,
type_prefix,
Descriptor::Term(name),
SymbolKind::Static,
current_vis,
);
}
}
"class_specifier" | "struct_specifier" | "union_specifier" | "enum_specifier" => {
let nested_ns: Vec<String> =
type_prefix.iter().map(|d| d.name().to_owned()).collect();
emit_aggregate(&member, &nested_ns, ctx, out);
}
_ => {}
}
}
}
fn find_function_declarator<'tree>(node: &Node<'tree>) -> Option<Node<'tree>> {
if node.kind() == "function_declarator" {
return Some(*node);
}
if let Some(inner) = node.child_by_field_name("declarator") {
return find_function_declarator(&inner);
}
for child in node.children(&mut node.walk()) {
if let Some(found) = find_function_declarator(&child) {
return Some(found);
}
}
None
}
fn collect_scopes(root: &Node, source_len: usize) -> Vec<Scope> {
let mut scopes = Vec::new();
push_scope(
&mut scopes,
None,
ByteSpan {
start: 0,
end: source_len,
},
ScopeKind::Module,
);
for child in root.children(&mut root.walk()) {
scope_dfs(&child, 0, &mut scopes);
}
scopes
}
fn scope_dfs(node: &Node, parent_id: ScopeId, scopes: &mut Vec<Scope>) {
match node.kind() {
"namespace_definition" => {
let ns_id = push_scope(scopes, Some(parent_id), node_span(node), ScopeKind::Type);
if let Some(body) = node.child_by_field_name("body") {
for child in body.children(&mut body.walk()) {
scope_dfs(&child, ns_id, scopes);
}
} else {
for child in node.children(&mut node.walk()) {
scope_dfs(&child, ns_id, scopes);
}
}
}
"class_specifier" | "struct_specifier" | "union_specifier" => {
let type_id = push_scope(scopes, Some(parent_id), node_span(node), ScopeKind::Type);
if let Some(body) = node.child_by_field_name("body") {
for child in body.children(&mut body.walk()) {
scope_dfs(&child, type_id, scopes);
}
} else {
for child in node.children(&mut node.walk()) {
scope_dfs(&child, type_id, scopes);
}
}
}
"function_definition" | "lambda_expression" => {
let fn_id = push_scope(
scopes,
Some(parent_id),
node_span(node),
ScopeKind::Function,
);
if let Some(body) = node.child_by_field_name("body") {
for child in body.children(&mut body.walk()) {
scope_dfs(&child, fn_id, scopes);
}
}
}
"compound_statement" => {
let block_id = push_scope(scopes, Some(parent_id), node_span(node), ScopeKind::Block);
for child in node.children(&mut node.walk()) {
scope_dfs(&child, block_id, scopes);
}
}
_ => {
for child in node.children(&mut node.walk()) {
scope_dfs(&child, parent_id, scopes);
}
}
}
}
fn collect_bindings(root: &Node, bytes: &[u8], scopes: &[Scope]) -> Vec<Binding> {
let mut out = Vec::new();
collect_bindings_dfs(root, bytes, scopes, &mut out);
out
}
fn collect_bindings_dfs(node: &Node, bytes: &[u8], scopes: &[Scope], out: &mut Vec<Binding>) {
match node.kind() {
"function_definition" => {
if let Some(decl) = node.child_by_field_name("declarator") {
if let Some(fn_decl) = find_function_declarator(&decl) {
if let Some(params) = fn_decl.child_by_field_name("parameters") {
collect_params(¶ms, bytes, scopes, out);
}
}
}
for child in node.children(&mut node.walk()) {
collect_bindings_dfs(&child, bytes, scopes, out);
}
}
"lambda_expression" => {
if let Some(fn_decl) = node.child_by_field_name("declarator") {
if let Some(params) = fn_decl.child_by_field_name("parameters") {
collect_params(¶ms, bytes, scopes, out);
}
}
for child in node.children(&mut node.walk()) {
collect_bindings_dfs(&child, bytes, scopes, out);
}
}
"declaration" => {
let mut cursor = node.walk();
for (i, child) in node.children(&mut cursor).enumerate() {
if node.field_name_for_child(i as u32) == Some("declarator") {
if let Some((name, _)) = declarator_name(&child, bytes) {
let intro = child.start_byte();
if let Some(sid) = innermost_scope(intro, scopes) {
if matches!(scopes[sid].kind, ScopeKind::Function | ScopeKind::Block) {
push_binding(out, name, intro, BindingKind::Local, scopes);
}
}
}
}
collect_bindings_dfs(&child, bytes, scopes, out);
}
}
"for_range_loop" => {
if let Some(decl) = node.child_by_field_name("declarator") {
if let Some((name, _)) = declarator_name(&decl, bytes) {
let intro = decl.start_byte();
if let Some(sid) = innermost_scope(intro, scopes) {
if matches!(scopes[sid].kind, ScopeKind::Function | ScopeKind::Block) {
push_binding(out, name, intro, BindingKind::Local, scopes);
}
}
}
}
for child in node.children(&mut node.walk()) {
collect_bindings_dfs(&child, bytes, scopes, out);
}
}
_ => {
for child in node.children(&mut node.walk()) {
collect_bindings_dfs(&child, bytes, scopes, out);
}
}
}
}
fn collect_params(params: &Node, bytes: &[u8], scopes: &[Scope], out: &mut Vec<Binding>) {
for child in params.children(&mut params.walk()) {
match child.kind() {
"parameter_declaration"
| "optional_parameter_declaration"
| "variadic_parameter_declaration" => {}
_ => continue,
}
let Some(decl) = child.child_by_field_name("declarator") else {
continue;
};
let Some((name, _)) = declarator_name(&decl, bytes) else {
continue;
};
let intro = decl.start_byte();
push_binding(out, name, intro, BindingKind::Param, scopes);
}
}
fn is_non_read_position(node: &Node) -> bool {
let parent = match node.parent() {
Some(p) => p,
None => return true,
};
match parent.kind() {
"call_expression" => parent.child_by_field_name("function").as_ref() == Some(node),
"qualified_identifier" => parent.child_by_field_name("name").as_ref() == Some(node),
"function_declarator"
| "pointer_declarator"
| "reference_declarator"
| "array_declarator"
| "init_declarator"
| "attributed_declarator" => {
parent.child_by_field_name("declarator").as_ref() == Some(node)
}
"parameter_declaration"
| "optional_parameter_declaration"
| "variadic_parameter_declaration" => {
parent.child_by_field_name("declarator").as_ref() == Some(node)
}
"field_expression" => parent.child_by_field_name("field").as_ref() == Some(node),
"assignment_expression" => parent.child_by_field_name("left").as_ref() == Some(node),
_ => false,
}
}
fn collect_read_references(node: &Node, bytes: &[u8], file: &str, out: &mut Vec<Reference>) {
if node.kind() == "identifier" {
let name = node_text(node, bytes);
if name.len() >= MIN_REF_LEN && !is_non_read_position(node) {
push_ref(out, name, node, file, RefRole::Read);
}
return;
}
for child in node.children(&mut node.walk()) {
collect_read_references(&child, bytes, file, out);
}
}
fn collect_write_references(node: &Node, bytes: &[u8], file: &str, out: &mut Vec<Reference>) {
if node.kind() == "assignment_expression" {
if let Some(lhs) = node.child_by_field_name("left") {
if lhs.kind() == "identifier" {
let name = node_text(&lhs, bytes);
if name.len() >= MIN_REF_LEN {
push_ref(out, name, &lhs, file, RefRole::Write);
}
}
}
}
for child in node.children(&mut node.walk()) {
collect_write_references(&child, bytes, file, out);
}
}
fn emit_cpp_type_node(
node: &Node,
bytes: &[u8],
file: &str,
ctx: TypeRefContext,
out: &mut Vec<Reference>,
) {
match node.kind() {
"primitive_type" | "sized_type_specifier" | "placeholder_type_specifier" => {}
"type_identifier" => {
let name = node_text(node, bytes);
push_type_ref(out, name, node, file, ctx);
}
"qualified_identifier" => {
if let Some(name_node) = node.child_by_field_name("name") {
emit_cpp_type_node(&name_node, bytes, file, ctx, out);
} else {
let text = node_text(node, bytes);
let leaf = text.rsplit("::").next().unwrap_or(text);
if !leaf.is_empty() {
push_type_ref(out, leaf, node, file, ctx);
}
}
}
"template_type" => {
if let Some(name_node) = node.child_by_field_name("name") {
if let Some(leaf) = type_leaf_name(&name_node, bytes) {
push_type_ref(out, &leaf, &name_node, file, ctx);
}
}
for child in node.children(&mut node.walk()) {
if child.kind() == "template_argument_list" {
for arg in child.children(&mut child.walk()) {
match arg.kind() {
"type_descriptor" => {
if let Some(t) = arg.child_by_field_name("type") {
emit_cpp_type_node(
&t,
bytes,
file,
TypeRefContext::GenericArg,
out,
);
}
}
"type_identifier" | "qualified_identifier" | "template_type" => {
emit_cpp_type_node(
&arg,
bytes,
file,
TypeRefContext::GenericArg,
out,
);
}
_ => {}
}
}
}
}
}
_ => {}
}
}
fn collect_type_references(node: &Node, bytes: &[u8], file: &str, out: &mut Vec<Reference>) {
match node.kind() {
"parameter_declaration" | "optional_parameter_declaration" => {
if let Some(type_node) = node.child_by_field_name("type") {
emit_cpp_type_node(&type_node, bytes, file, TypeRefContext::ParameterType, out);
}
for child in node.children(&mut node.walk()) {
collect_type_references(&child, bytes, file, out);
}
return;
}
"function_definition" => {
if let Some(type_node) = node.child_by_field_name("type") {
emit_cpp_type_node(&type_node, bytes, file, TypeRefContext::ReturnType, out);
}
for child in node.children(&mut node.walk()) {
collect_type_references(&child, bytes, file, out);
}
return;
}
"field_declaration" => {
if let Some(type_node) = node.child_by_field_name("type") {
emit_cpp_type_node(&type_node, bytes, file, TypeRefContext::Field, out);
}
for child in node.children(&mut node.walk()) {
collect_type_references(&child, bytes, file, out);
}
return;
}
_ => {}
}
for child in node.children(&mut node.walk()) {
collect_type_references(&child, bytes, file, out);
}
}
fn collect_inheritance(node: &Node, bytes: &[u8], file: &str, out: &mut Vec<Reference>) {
match node.kind() {
"class_specifier" | "struct_specifier" => {
if let Some(clause) = node
.children(&mut node.walk())
.find(|c| c.kind() == "base_class_clause")
{
for base in clause.children(&mut clause.walk()) {
match base.kind() {
"type_identifier" | "qualified_identifier" | "template_type" => {
super::push_ref(
out,
super::simple_type_name(node_text(&base, bytes), "::"),
&base,
file,
RefRole::IsImplementation,
);
}
_ => {}
}
}
}
}
_ => {}
}
for child in node.children(&mut node.walk()) {
collect_inheritance(&child, bytes, file, out);
}
}
#[cfg(test)]
mod tests {
use super::*;
fn by_name<'a>(facts: &'a FileFacts, n: &str) -> Option<&'a Symbol> {
facts.symbols.iter().find(|s| s.name == n)
}
#[test]
fn free_main_is_entry_point() {
let facts = CppExtractor
.extract("int main() { return 0; }", "src/main.cpp")
.unwrap();
let main = by_name(&facts, "main").unwrap();
assert!(
main.entry_points
.iter()
.any(|e| matches!(e, EntryPoint::Main))
);
}
#[test]
fn method_main_is_not_entry_point() {
let facts = CppExtractor
.extract("struct S { int main(); };", "src/s.cpp")
.unwrap();
let main = by_name(&facts, "main").unwrap();
assert!(
!main
.entry_points
.iter()
.any(|e| matches!(e, EntryPoint::Main))
);
}
#[test]
fn free_function_in_namespace() {
let src = r#"
namespace io {
int connect(const char *host) { return 0; }
}
"#;
let facts = CppExtractor.extract(src, "src/net/sock.cpp").unwrap();
let f = by_name(&facts, "connect").unwrap();
assert_eq!(f.kind, SymbolKind::Function);
assert_eq!(
f.id.to_scip_string(),
"codegraph . . . net/sock/io/connect()."
);
assert_eq!(facts.lang, "cpp");
}
#[test]
fn class_visibility() {
let src = r#"
namespace io {
class Sock {
public:
void open();
private:
void shutdown();
};
}
"#;
let facts = CppExtractor.extract(src, "src/net/sock.cpp").unwrap();
let sock = by_name(&facts, "Sock").unwrap();
assert_eq!(sock.kind, SymbolKind::Class);
assert_eq!(
sock.id.to_scip_string(),
"codegraph . . . net/sock/io/Sock#"
);
let open = by_name(&facts, "open").unwrap();
assert_eq!(open.kind, SymbolKind::Method);
assert_eq!(open.visibility, Visibility::Public);
assert_eq!(
open.id.to_scip_string(),
"codegraph . . . net/sock/io/Sock#open()."
);
let shutdown = by_name(&facts, "shutdown").unwrap();
assert_eq!(shutdown.kind, SymbolKind::Method);
assert_eq!(shutdown.visibility, Visibility::Private);
assert_eq!(
shutdown.id.to_scip_string(),
"codegraph . . . net/sock/io/Sock#shutdown()."
);
}
#[test]
fn struct_field_default_public() {
let src = r#"
struct Point {
int x;
int y;
};
"#;
let facts = CppExtractor.extract(src, "src/geo.cpp").unwrap();
let point = by_name(&facts, "Point").unwrap();
assert_eq!(point.kind, SymbolKind::Struct);
assert_eq!(point.id.to_scip_string(), "codegraph . . . geo/Point#");
let x = by_name(&facts, "x").unwrap();
assert_eq!(x.kind, SymbolKind::Static);
assert_eq!(x.visibility, Visibility::Public);
assert_eq!(x.id.to_scip_string(), "codegraph . . . geo/Point#x.");
let y = by_name(&facts, "y").unwrap();
assert_eq!(y.visibility, Visibility::Public);
}
#[test]
fn enum_and_alias() {
let src = r#"
enum Color { Red, Green };
using Id = int;
typedef int Handle;
"#;
let facts = CppExtractor.extract(src, "src/types.cpp").unwrap();
let color = by_name(&facts, "Color").unwrap();
assert_eq!(color.kind, SymbolKind::Enum);
assert_eq!(color.id.to_scip_string(), "codegraph . . . types/Color#");
let id = by_name(&facts, "Id").unwrap();
assert_eq!(id.kind, SymbolKind::TypeAlias);
assert_eq!(id.id.to_scip_string(), "codegraph . . . types/Id#");
let handle = by_name(&facts, "Handle").unwrap();
assert_eq!(handle.kind, SymbolKind::TypeAlias);
assert_eq!(handle.id.to_scip_string(), "codegraph . . . types/Handle#");
}
#[test]
fn define_macro() {
let src = r#"
#define MAX_CONN 64
"#;
let facts = CppExtractor.extract(src, "src/conf.hpp").unwrap();
let m = by_name(&facts, "MAX_CONN").unwrap();
assert_eq!(m.kind, SymbolKind::Const);
assert_eq!(m.id.to_scip_string(), "codegraph . . . conf/MAX_CONN!");
}
#[test]
fn extracts_call_references() {
let src = r#"
void run() {
connect("host");
obj.handle();
}
"#;
let facts = CppExtractor.extract(src, "src/main.cpp").unwrap();
let names: Vec<&str> = facts.references.iter().map(|r| r.name.as_str()).collect();
assert!(
names.contains(&"connect"),
"expected 'connect' in {names:?}"
);
assert!(names.contains(&"handle"), "expected 'handle' in {names:?}");
}
#[test]
fn inherit_single_public_base() {
let src = "class Derived : public Base {};";
let facts = CppExtractor.extract(src, "src/foo.cpp").unwrap();
let inherit: Vec<&str> = facts
.references
.iter()
.filter(|r| r.role == RefRole::IsImplementation)
.map(|r| r.name.as_str())
.collect();
assert_eq!(inherit, vec!["Base"], "expected [Base], got {inherit:?}");
}
#[test]
fn inherit_struct_multiple_bases() {
let src = "struct S : A, B {};";
let facts = CppExtractor.extract(src, "src/foo.cpp").unwrap();
let mut inherit: Vec<&str> = facts
.references
.iter()
.filter(|r| r.role == RefRole::IsImplementation)
.map(|r| r.name.as_str())
.collect();
inherit.sort_unstable();
assert_eq!(inherit, vec!["A", "B"], "expected [A, B], got {inherit:?}");
}
#[test]
fn inherit_qualified_base_strips_namespace() {
let src = "class X : public ns::Base {};";
let facts = CppExtractor.extract(src, "src/foo.cpp").unwrap();
let inherit: Vec<&str> = facts
.references
.iter()
.filter(|r| r.role == RefRole::IsImplementation)
.map(|r| r.name.as_str())
.collect();
assert_eq!(inherit, vec!["Base"], "expected [Base], got {inherit:?}");
}
#[test]
fn params_emit_param_bindings() {
let src = "void add(int a, int b){}\n";
let facts = CppExtractor.extract(src, "src/math.cpp").unwrap();
let fn_scope_id = facts
.scopes
.iter()
.position(|s| s.kind == ScopeKind::Function)
.expect("expected a Function scope");
let mut param_names: Vec<(&str, ScopeId)> = facts
.bindings
.iter()
.filter(|b| b.kind == BindingKind::Param)
.map(|b| (b.name.as_str(), b.scope))
.collect();
param_names.sort_by_key(|(n, _)| *n);
assert_eq!(
param_names,
vec![("a", fn_scope_id), ("b", fn_scope_id)],
"expected Param bindings for a and b, got {param_names:?}"
);
}
#[test]
fn reference_param_emits_param_binding() {
let src = "void inc(int& r){}\n";
let facts = CppExtractor.extract(src, "src/inc.cpp").unwrap();
let r = facts
.bindings
.iter()
.find(|b| b.kind == BindingKind::Param && b.name == "r")
.expect("expected a Param binding for 'r'");
assert_eq!(
facts.scopes[r.scope].kind,
ScopeKind::Function,
"reference param 'r' should be in a Function scope"
);
}
#[test]
fn optional_param_emits_param_binding() {
let src = "void f(int x, int y = 0){}\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let mut param_names: Vec<&str> = facts
.bindings
.iter()
.filter(|b| b.kind == BindingKind::Param)
.map(|b| b.name.as_str())
.collect();
param_names.sort_unstable();
assert_eq!(
param_names,
vec!["x", "y"],
"expected Param bindings for x and y, got {param_names:?}"
);
}
#[test]
fn pointer_return_function_params_collected() {
let src = "char* dup(const char* s){return 0;}\n";
let facts = CppExtractor.extract(src, "src/dup.cpp").unwrap();
let s = facts
.bindings
.iter()
.find(|b| b.kind == BindingKind::Param && b.name == "s")
.expect("pointer-return function's param 's' should be collected");
assert_eq!(
facts.scopes[s.scope].kind,
ScopeKind::Function,
"param 's' should be in a Function scope"
);
}
#[test]
fn local_var_emits_local_binding() {
let src = "void f(){ int x = 0; }\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let x = facts
.bindings
.iter()
.find(|b| b.kind == BindingKind::Local && b.name == "x")
.expect("expected a Local binding for 'x'");
assert_ne!(x.scope, 0, "local 'x' must NOT be in scope 0 (file root)");
assert!(
matches!(
facts.scopes[x.scope].kind,
ScopeKind::Function | ScopeKind::Block
),
"local 'x' scope must be Function or Block, got {:?}",
facts.scopes[x.scope].kind
);
}
#[test]
fn range_for_emits_local_binding() {
let src = "void f(){ int a[3]={}; for (int v : a){} }\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let v = facts
.bindings
.iter()
.find(|b| b.kind == BindingKind::Local && b.name == "v")
.expect("expected a Local binding for 'v'");
assert_ne!(v.scope, 0, "range-for 'v' must NOT be in scope 0");
}
#[test]
fn namespace_global_not_a_local() {
let src = "namespace ns { int g; void f(){} }\n";
let facts = CppExtractor.extract(src, "src/ns.cpp").unwrap();
assert!(
!facts
.bindings
.iter()
.any(|b| b.kind == BindingKind::Local && b.name == "g"),
"namespace global 'g' must NOT be a Local binding"
);
assert!(
facts
.bindings
.iter()
.any(|b| b.kind == BindingKind::Definition && b.name == "g"),
"namespace global 'g' must have a Definition binding"
);
}
#[test]
fn class_fields_not_locals() {
let src = "struct P { int x; int y; };\n";
let facts = CppExtractor.extract(src, "src/p.cpp").unwrap();
assert!(
!facts
.bindings
.iter()
.any(|b| b.kind == BindingKind::Local && (b.name == "x" || b.name == "y")),
"class fields must NOT be Local bindings"
);
assert!(
facts
.bindings
.iter()
.any(|b| b.kind == BindingKind::Definition && b.name == "x"),
"struct field 'x' must have a Definition binding"
);
assert!(
facts
.bindings
.iter()
.any(|b| b.kind == BindingKind::Definition && b.name == "y"),
"struct field 'y' must have a Definition binding"
);
}
#[test]
fn nesting_produces_type_and_function_scopes() {
let src = "struct S { void m(){ int v=0; } };\n";
let facts = CppExtractor.extract(src, "src/s.cpp").unwrap();
let has_type = facts.scopes.iter().any(|s| s.kind == ScopeKind::Type);
let has_fn = facts.scopes.iter().any(|s| s.kind == ScopeKind::Function);
assert!(has_type, "expected a Type scope for struct body");
assert!(has_fn, "expected a Function scope for method body");
let v = facts
.bindings
.iter()
.find(|b| b.kind == BindingKind::Local && b.name == "v")
.expect("expected a Local binding for 'v'");
assert!(
matches!(
facts.scopes[v.scope].kind,
ScopeKind::Function | ScopeKind::Block
),
"local 'v' must be in a Function or Block scope"
);
}
#[test]
fn namespace_body_opens_type_scope() {
let src = "namespace io { void f(){} }\n";
let facts = CppExtractor.extract(src, "src/io.cpp").unwrap();
assert!(
facts.scopes.len() >= 3,
"expected at least 3 scopes (Module + Type for namespace + Function), got {}",
facts.scopes.len()
);
assert!(
facts.scopes.iter().any(|s| s.kind == ScopeKind::Type),
"expected a Type scope for namespace body"
);
}
#[test]
fn lambda_params_emit_param_bindings() {
let src = "void f(){ auto fn = [](int x, int y){}; }\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let mut param_names: Vec<&str> = facts
.bindings
.iter()
.filter(|b| b.kind == BindingKind::Param)
.map(|b| b.name.as_str())
.collect();
param_names.sort_unstable();
assert_eq!(
param_names,
vec!["x", "y"],
"expected Param bindings for lambda params x and y, got {param_names:?}"
);
}
#[test]
fn same_file_call_ref_has_non_zero_scope_and_callee_has_definition() {
let src = "int helper(){return 0;}\nint caller(){return helper();}\n";
let facts = CppExtractor.extract(src, "src/pair.cpp").unwrap();
assert!(
facts
.bindings
.iter()
.any(|b| b.kind == BindingKind::Definition && b.name == "helper"),
"expected a Definition binding for 'helper'"
);
let call_ref = facts
.references
.iter()
.find(|r| r.role == RefRole::Call && r.name == "helper")
.expect("expected a Call ref for 'helper'");
assert!(
call_ref.scope.is_some() && call_ref.scope != Some(0),
"helper() call ref must be in a non-zero scope, got {:?}",
call_ref.scope
);
}
#[test]
fn out_of_line_method_param_collected() {
let src = "class Foo {};\nvoid Foo::bar(int a){}\n";
let facts = CppExtractor.extract(src, "src/foo.cpp").unwrap();
let a = facts
.bindings
.iter()
.find(|b| b.kind == BindingKind::Param && b.name == "a")
.expect("expected a Param binding for 'a' in out-of-line method");
assert_eq!(
facts.scopes[a.scope].kind,
ScopeKind::Function,
"param 'a' should be in a Function scope"
);
}
#[test]
fn read_ref_at_use_not_at_decl() {
let src = "int f() { int base = 1; return base; }\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let read_refs: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::Read && r.name == "base")
.collect();
assert!(
!read_refs.is_empty(),
"expected at least one Read ref for 'base', got none"
);
let use_ref = read_refs
.iter()
.find(|r| r.occ.byte > 20)
.expect("expected Read ref for 'base' in the return statement (byte > 20)");
assert!(
use_ref.occ.byte > 20,
"Read ref should be at the use site, not the declaration"
);
}
#[test]
fn write_ref_emitted_for_assignment() {
let src = "void f() { int cnt = 0; cnt = 5; }\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let write_refs: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::Write && r.name == "cnt")
.collect();
assert!(
!write_refs.is_empty(),
"expected at least one Write ref for 'cnt', got none — all refs: {:?}",
facts
.references
.iter()
.map(|r| (&r.name, r.role))
.collect::<Vec<_>>()
);
}
#[test]
fn call_not_also_read() {
let src = "void f() { helper(); }\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let call_refs: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::Call && r.name == "helper")
.collect();
assert!(!call_refs.is_empty(), "expected a Call ref for 'helper'");
let read_refs: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::Read && r.name == "helper")
.collect();
assert!(
read_refs.is_empty(),
"helper() must NOT produce a Read ref; got: {read_refs:?}"
);
}
#[test]
fn field_access_reads_ptr_not_field() {
let src = "struct S { int field; };\nint f(S* ptr) { return ptr->field; }\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let ptr_reads: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::Read && r.name == "ptr")
.collect();
assert!(
!ptr_reads.is_empty(),
"expected a Read ref for 'ptr', got none"
);
let field_reads: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::Read && r.name == "field")
.collect();
assert!(
field_reads.is_empty(),
"field_identifier 'field' must NOT be a Read ref; got: {field_reads:?}"
);
}
#[test]
fn type_ref_param_type() {
let src = "void f(Config c) {}\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let r = facts
.references
.iter()
.find(|r| r.role == RefRole::TypeRef && r.name == "Config")
.expect("expected TypeRef ref for 'Config'");
assert_eq!(
r.type_ref_ctx,
Some(TypeRefContext::ParameterType),
"expected ParameterType ctx, got {:?}",
r.type_ref_ctx
);
}
#[test]
fn type_ref_return_type() {
let src = "Config make() { return Config(); }\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let r = facts
.references
.iter()
.find(|r| r.role == RefRole::TypeRef && r.name == "Config")
.expect("expected TypeRef ref for 'Config'");
assert_eq!(
r.type_ref_ctx,
Some(TypeRefContext::ReturnType),
"expected ReturnType ctx, got {:?}",
r.type_ref_ctx
);
}
#[test]
fn type_ref_field_type() {
let src = "struct T { Config conf; };\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let r = facts
.references
.iter()
.find(|r| r.role == RefRole::TypeRef && r.name == "Config")
.expect("expected TypeRef ref for 'Config'");
assert_eq!(
r.type_ref_ctx,
Some(TypeRefContext::Field),
"expected Field ctx, got {:?}",
r.type_ref_ctx
);
}
#[test]
fn type_ref_template_arg() {
let src = "void f(std::vector<Config> xs) {}\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let type_refs: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::TypeRef)
.collect();
let vector_ref = type_refs.iter().find(|r| r.name == "vector");
assert!(
vector_ref.is_some(),
"expected TypeRef 'vector', got: {:?}",
type_refs.iter().map(|r| &r.name).collect::<Vec<_>>()
);
assert_eq!(
vector_ref.unwrap().type_ref_ctx,
Some(TypeRefContext::ParameterType),
"expected ParameterType ctx for 'vector'"
);
let config_ref = type_refs.iter().find(|r| r.name == "Config");
assert!(
config_ref.is_some(),
"expected TypeRef 'Config', got: {:?}",
type_refs.iter().map(|r| &r.name).collect::<Vec<_>>()
);
assert_eq!(
config_ref.unwrap().type_ref_ctx,
Some(TypeRefContext::GenericArg),
"expected GenericArg ctx for 'Config'"
);
}
#[test]
fn type_ref_primitive_skipped() {
let src = "void f(int n) {}\n";
let facts = CppExtractor.extract(src, "src/f.cpp").unwrap();
let int_typerefs: Vec<_> = facts
.references
.iter()
.filter(|r| r.role == RefRole::TypeRef && r.name == "int")
.collect();
assert!(
int_typerefs.is_empty(),
"primitive 'int' must NOT produce a TypeRef, got: {int_typerefs:?}"
);
}
#[test]
fn class_public_member_tagged_public() {
let src = "class Foo { public: void go(); };\n";
let facts = CppExtractor.extract(src, "src/foo.cpp").unwrap();
let go = by_name(&facts, "go").unwrap();
assert_eq!(
go.visibility,
Visibility::Public,
"public: member must be Public"
);
}
#[test]
fn class_private_member_emitted_and_tagged_private() {
let src = "class Foo { private: void secret(); };\n";
let facts = CppExtractor.extract(src, "src/foo.cpp").unwrap();
let secret = by_name(&facts, "secret").expect("private member 'secret' must now emit");
assert_eq!(
secret.visibility,
Visibility::Private,
"private: member must be Private"
);
}
#[test]
fn class_protected_member_emitted_and_tagged_protected() {
let src = "class Foo { protected: void hook(); };\n";
let facts = CppExtractor.extract(src, "src/foo.cpp").unwrap();
let hook = by_name(&facts, "hook").expect("protected member 'hook' must emit");
assert_eq!(
hook.visibility,
Visibility::Protected,
"protected: member must be Protected"
);
}
#[test]
fn class_default_member_is_private() {
let src = "class Foo { void hidden(); };\n";
let facts = CppExtractor.extract(src, "src/foo.cpp").unwrap();
let hidden = by_name(&facts, "hidden").expect("class member with no specifier must emit");
assert_eq!(
hidden.visibility,
Visibility::Private,
"class default must be Private"
);
}
#[test]
fn struct_default_member_is_public() {
let src = "struct Bar { int val; };\n";
let facts = CppExtractor.extract(src, "src/bar.cpp").unwrap();
let val = by_name(&facts, "val").expect("struct member with no specifier must emit");
assert_eq!(
val.visibility,
Visibility::Public,
"struct default must be Public"
);
}
#[test]
fn static_file_level_fn_is_private() {
let src = "static void impl(){}\n";
let facts = CppExtractor.extract(src, "src/util.cpp").unwrap();
let impl_sym = by_name(&facts, "impl").expect("static file-level fn must emit");
assert_eq!(
impl_sym.visibility,
Visibility::Private,
"static (internal-linkage) fn must be Private"
);
}
#[test]
fn non_static_file_level_fn_is_public() {
let src = "void pub_fn(){}\n";
let facts = CppExtractor.extract(src, "src/util.cpp").unwrap();
let f = by_name(&facts, "pub_fn").unwrap();
assert_eq!(
f.visibility,
Visibility::Public,
"non-static fn must be Public"
);
}
#[test]
fn mixed_access_specifiers_all_members_emit() {
let src = r#"
class Widget {
public:
void show();
protected:
void draw();
private:
void impl();
};
"#;
let facts = CppExtractor.extract(src, "src/widget.cpp").unwrap();
let show = by_name(&facts, "show").expect("show must emit");
assert_eq!(show.visibility, Visibility::Public);
let draw = by_name(&facts, "draw").expect("draw must emit");
assert_eq!(draw.visibility, Visibility::Protected);
let impl_m = by_name(&facts, "impl").expect("impl must emit");
assert_eq!(impl_m.visibility, Visibility::Private);
}
}