use std::collections::{HashMap, HashSet};
use std::fmt;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use futures::stream::{FuturesUnordered, StreamExt};
use neo4rs::{query, Graph};
use thiserror::Error;
use tokio::sync::Mutex;
use tree_sitter::{Node, Tree};
use crate::compress::{compress_full_source, compress_snippet, CompressorClient};
use crate::go_resolve::{
discover_go_modules, discover_go_replaces, is_likely_third_party_go_import,
resolve_go_import_to_known_go_file, GoModule, GoReplace,
};
use crate::python_common_external::is_python_common_external_top_level;
use crate::go_stdlib::is_go_stdlib_import;
use crate::ir::{
api_endpoint_key, external_api_key, module_key, ClassIr, EdgeIr,
EdgeKind, FunctionIr, ProjectIr, PropertyIr,
};
use crate::python_stdlib::is_python_stdlib_top_level;
use crate::schema::props;
use crate::scanner::ParsedFile;
use crate::LanguageId;
#[derive(Debug, Clone)]
pub struct Neo4jConfig {
pub uri: String,
pub user: String,
pub password: String,
}
#[derive(Debug, Clone)]
pub struct GraphPersistenceOptions {
pub verbose_imports: bool,
pub max_parse_warnings_per_file: usize,
pub compressor: CompressorConfig,
}
impl Default for GraphPersistenceOptions {
fn default() -> Self {
Self {
verbose_imports: false,
max_parse_warnings_per_file: 50,
compressor: CompressorConfig::default(),
}
}
}
pub use crate::compress::{CompressorConfig, DEFAULT_COMPRESSOR_URL};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NodeKind {
File,
Class,
Function,
}
impl fmt::Display for NodeKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
NodeKind::File => f.write_str("File"),
NodeKind::Class => f.write_str("Class"),
NodeKind::Function => f.write_str("Function"),
}
}
}
#[derive(Debug, Clone)]
struct ClassSymbol {
name: String,
fqn: String,
kind: Option<&'static str>,
}
#[derive(Debug, Clone)]
struct PropertySymbol {
class_fqn: String,
name: String,
fqn: String,
declared_type: Option<String>,
}
#[derive(Debug, Clone)]
struct FunctionSymbol {
name: String,
fqn: String,
class_fqn: Option<String>,
return_type: Option<String>,
param_types: Vec<String>,
param_count: usize,
modifiers: Vec<String>,
is_pointer_receiver: Option<bool>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct CallbackContract {
behaviour: String,
name: String,
arity: u32,
optional: bool,
}
#[derive(Debug, Default, Clone)]
struct ErlangModuleSnapshot {
implemented_signatures: HashSet<(String, u32)>,
callback_signatures: HashSet<(String, u32)>,
}
#[derive(Debug, Default)]
struct ErlangBehaviourMetadata {
behaviour_usages: HashSet<String>,
declared_callbacks: Vec<(String, u32)>,
optional_callbacks: HashSet<(String, u32)>,
behaviour_extensions: HashSet<String>,
overridden_callbacks: HashSet<(String, u32)>,
}
#[derive(Debug, Error)]
pub enum GraphError {
#[error("neo4j connection error: {0}")]
Connection(#[from] neo4rs::Error),
}
const BATCH_FLUSH_THRESHOLD: usize = 3000;
const CSHARP_NODE_BATCH_FLUSH_THRESHOLD: usize = 500;
#[derive(Debug, Default)]
struct BatchAccumulator {
calls_function: HashSet<(String, String)>,
uses_class: HashSet<(String, String)>,
class_uses_class: HashSet<(String, String)>,
calls_external_api: HashSet<(String, String, String)>,
}
type SharedBatchAccumulator = Arc<Mutex<BatchAccumulator>>;
impl BatchAccumulator {
fn new() -> Self {
Self::default()
}
fn add_calls_function(&mut self, caller_fqn: String, callee_fqn: String) {
self.calls_function.insert((caller_fqn, callee_fqn));
}
fn add_uses_class(&mut self, fn_fqn: String, class_fqn: String) {
self.uses_class.insert((fn_fqn, class_fqn));
}
fn add_class_uses_class(&mut self, derived_fqn: String, base_fqn: String) {
self.class_uses_class.insert((derived_fqn, base_fqn));
}
fn add_calls_external_api(&mut self, fn_fqn: String, base_url: String, norm_path: String) {
self.calls_external_api.insert((fn_fqn, base_url, norm_path));
}
fn should_flush(&self) -> bool {
self.calls_function.len() >= BATCH_FLUSH_THRESHOLD
|| self.uses_class.len() >= BATCH_FLUSH_THRESHOLD
|| self.class_uses_class.len() >= BATCH_FLUSH_THRESHOLD
|| self.calls_external_api.len() >= BATCH_FLUSH_THRESHOLD
}
fn total_size(&self) -> usize {
self.calls_function.len()
+ self.uses_class.len()
+ self.class_uses_class.len()
+ self.calls_external_api.len()
}
async fn flush(&mut self, graph: &Graph) -> Result<(), GraphError> {
let total = self.total_size();
if total == 0 {
return Ok(());
}
println!("Neo4j: flushing {} accumulated relationships...", total);
if !self.calls_function.is_empty() {
let caller_fqns: Vec<String> = self.calls_function.iter().map(|(c, _)| c.clone()).collect();
let callee_fqns: Vec<String> = self.calls_function.iter().map(|(_, c)| c.clone()).collect();
let batch_query = query(
"
UNWIND range(0, size($caller_fqns) - 1) AS i
WITH $caller_fqns[i] AS caller_fqn, $callee_fqns[i] AS callee_fqn
MERGE (caller:Function { fqn: caller_fqn })
MERGE (callee:Function { fqn: callee_fqn })
MERGE (caller)-[:CALLS_FUNCTION]->(callee)
",
)
.param("caller_fqns", caller_fqns)
.param("callee_fqns", callee_fqns);
graph.run(batch_query).await?;
self.calls_function.clear();
}
if !self.uses_class.is_empty() {
let fn_fqns: Vec<String> = self.uses_class.iter().map(|(f, _)| f.clone()).collect();
let cls_fqns: Vec<String> = self.uses_class.iter().map(|(_, c)| c.clone()).collect();
let batch_query = query(
"
UNWIND range(0, size($fn_fqns) - 1) AS i
WITH $fn_fqns[i] AS fn_fqn, $cls_fqns[i] AS cls_fqn
MERGE (fn:Function { fqn: fn_fqn })
MERGE (cls:Class { fqn: cls_fqn })
MERGE (fn)-[:USES_CLASS]->(cls)
",
)
.param("fn_fqns", fn_fqns)
.param("cls_fqns", cls_fqns);
graph.run(batch_query).await?;
self.uses_class.clear();
}
if !self.class_uses_class.is_empty() {
let derived: Vec<String> = self.class_uses_class.iter().map(|(d, _)| d.clone()).collect();
let bases: Vec<String> = self.class_uses_class.iter().map(|(_, b)| b.clone()).collect();
let batch_query = query(
"
UNWIND range(0, size($derived_fqns) - 1) AS i
WITH $derived_fqns[i] AS derived_fqn, $base_fqns[i] AS base_fqn
MERGE (d:Class { fqn: derived_fqn })
MERGE (b:Class { fqn: base_fqn })
MERGE (d)-[:USES_CLASS]->(b)
",
)
.param("derived_fqns", derived)
.param("base_fqns", bases);
graph.run(batch_query).await?;
self.class_uses_class.clear();
}
if !self.calls_external_api.is_empty() {
let fn_fqns: Vec<String> = self.calls_external_api.iter().map(|(f, _, _)| f.clone()).collect();
let base_urls: Vec<String> = self.calls_external_api.iter().map(|(_, b, _)| b.clone()).collect();
let norm_paths: Vec<String> = self.calls_external_api.iter().map(|(_, _, n)| n.clone()).collect();
let batch_query = query(
"
UNWIND range(0, size($fn_fqns) - 1) AS i
WITH $fn_fqns[i] AS fn_fqn, $base_urls[i] AS base_url, $norm_paths[i] AS norm_path
MERGE (fn:Function { fqn: fn_fqn })
MERGE (ext:ExternalApi { base_url: base_url, norm_path: norm_path })
MERGE (fn)-[:CALLS_EXTERNAL_API]->(ext)
",
)
.param("fn_fqns", fn_fqns)
.param("base_urls", base_urls)
.param("norm_paths", norm_paths);
graph.run(batch_query).await?;
self.calls_external_api.clear();
}
Ok(())
}
}
async fn flush_shared_accumulator_if_needed(
shared_accumulator: &SharedBatchAccumulator,
graph: &Graph,
) -> Result<(), GraphError> {
let mut local_batch = BatchAccumulator::new();
{
let mut guard = shared_accumulator.lock().await;
if !guard.should_flush() {
return Ok(());
}
std::mem::swap(&mut *guard, &mut local_batch);
}
local_batch.flush(graph).await
}
async fn flush_shared_accumulator_force(
shared_accumulator: &SharedBatchAccumulator,
graph: &Graph,
) -> Result<(), GraphError> {
let mut local_batch = BatchAccumulator::new();
{
let mut guard = shared_accumulator.lock().await;
if guard.total_size() == 0 {
return Ok(());
}
std::mem::swap(&mut *guard, &mut local_batch);
}
local_batch.flush(graph).await
}
const MAX_CONCURRENT_ERLANG_WRITES: usize = 8;
fn repo_relative_file_path(root: &Path, file_path: &Path) -> PathBuf {
let combined = if file_path.is_absolute() {
file_path.to_path_buf()
} else {
root.join(file_path)
};
let root_abs = root.canonicalize().unwrap_or_else(|_| root.to_path_buf());
let file_abs = combined
.canonicalize()
.unwrap_or_else(|_| combined.clone());
if let Ok(rel) = file_abs.strip_prefix(&root_abs) {
if !rel.as_os_str().is_empty() {
return rel.to_path_buf();
}
}
if let Ok(rel) = combined.strip_prefix(root) {
return rel.to_path_buf();
}
if let Ok(rel) = file_path.strip_prefix(root) {
return rel.to_path_buf();
}
if let Ok(rel) = file_path.strip_prefix(&root_abs) {
return rel.to_path_buf();
}
file_path.to_path_buf()
}
fn neo4j_path_string(root: &Path, file_path: &Path) -> String {
path_str_slash(&repo_relative_file_path(root, file_path))
}
fn path_str_slash(p: &Path) -> String {
p.to_string_lossy().replace('\\', "/")
}
pub(crate) fn derive_project_name(file_path: &Path, root: &Path) -> Option<String> {
use std::path::Component;
let rel = repo_relative_file_path(root, file_path);
for c in rel.components() {
if let Component::Normal(s) = c {
return s.to_str().map(|x| x.to_string());
}
}
None
}
fn build_erlang_module_index(files: &[ParsedFile]) -> HashMap<String, ErlangModuleSnapshot> {
let mut index: HashMap<String, ErlangModuleSnapshot> = HashMap::new();
for file in files {
if file.language != LanguageId::Erlang {
continue;
}
let Some(module_name) = resolve_erlang_module_name(&file.path, &file.tree, &file.source) else {
continue;
};
let functions = extract_erlang_functions(&module_name, &file.tree, &file.source);
let meta = extract_erlang_behaviour_metadata_from_tree(&file.tree, &file.source);
let contracts = collect_callback_contracts_for_module(
Some(module_name.as_str()),
&meta.behaviour_usages,
&meta.declared_callbacks,
&meta.optional_callbacks,
);
let snapshot = index.entry(module_name).or_default();
for (name, arity, _fqn) in functions {
snapshot.implemented_signatures.insert((name, arity));
}
for contract in contracts {
snapshot
.callback_signatures
.insert((contract.name, contract.arity));
}
}
index
}
pub fn append_csharp_structural_ir(
ir: &mut ProjectIr,
file_path: &str,
project_name: Option<String>,
tree: &Tree,
source: &str,
) {
let (classes, _, properties) = extract_csharp_symbols(tree, source);
let language = LanguageId::CSharp.to_string();
for c in classes {
ir.classes.push(ClassIr {
fqn: c.fqn.clone(),
name: c.name,
path: file_path.to_string(),
language: language.clone(),
project_name: project_name.clone(),
kind: c.kind.map(str::to_string),
code_bytes: None,
});
ir.edges.push(EdgeIr {
kind: EdgeKind::DeclaresClass,
from_label: String::from("File"),
from_key: file_path.to_string(),
to_label: String::from("Class"),
to_key: c.fqn,
});
}
for p in properties {
ir.properties.push(PropertyIr {
fqn: p.fqn.clone(),
name: p.name,
class_fqn: p.class_fqn.clone(),
path: file_path.to_string(),
language: language.clone(),
project_name: project_name.clone(),
declared_type: p.declared_type,
code_bytes: None,
});
ir.edges.push(EdgeIr {
kind: EdgeKind::DeclaresProperty,
from_label: String::from("Class"),
from_key: p.class_fqn,
to_label: String::from("Property"),
to_key: p.fqn,
});
}
}
pub fn append_java_class_ir(
ir: &mut ProjectIr,
file_path: &str,
project_name: Option<String>,
tree: &Tree,
source: &str,
) {
let package = extract_java_package(source);
let (classes, _) = extract_java_symbols(tree, source, package.as_deref());
let language = LanguageId::Java.to_string();
for c in classes {
ir.classes.push(ClassIr {
fqn: c.fqn.clone(),
name: c.name,
path: file_path.to_string(),
language: language.clone(),
project_name: project_name.clone(),
kind: c.kind.map(str::to_string),
code_bytes: None,
});
ir.edges.push(EdgeIr {
kind: EdgeKind::DeclaresClass,
from_label: String::from("File"),
from_key: file_path.to_string(),
to_label: String::from("Class"),
to_key: c.fqn,
});
}
}
pub async fn cleanup_incremental_targets_in_neo4j(
cfg: &Neo4jConfig,
root: &Path,
cleanup_targets: &[String],
) -> Result<(), GraphError> {
if cleanup_targets.is_empty() {
println!("Neo4j cleanup: no cleanup targets, skipping.");
return Ok(());
}
let normalized_paths: Vec<String> = cleanup_targets
.iter()
.map(|target| neo4j_path_string(root, Path::new(target)))
.collect::<HashSet<_>>()
.into_iter()
.collect();
if normalized_paths.is_empty() {
println!("Neo4j cleanup: no normalized paths, skipping.");
return Ok(());
}
println!(
"Neo4j cleanup: deleting stale graph scope for {} path(s)...",
normalized_paths.len()
);
let graph = Graph::new(&cfg.uri, &cfg.user, &cfg.password).await?;
let delete_files = query(
"
UNWIND $paths AS path
OPTIONAL MATCH (f:File { path: path })
DETACH DELETE f
",
)
.param("paths", normalized_paths.clone());
graph.run(delete_files).await?;
let delete_modules = query(
"
UNWIND $paths AS path
OPTIONAL MATCH (m:Module { path: path })
DETACH DELETE m
",
)
.param("paths", normalized_paths.clone());
graph.run(delete_modules).await?;
let delete_classes = query(
"
UNWIND $paths AS path
OPTIONAL MATCH (c:Class { path: path })
DETACH DELETE c
",
)
.param("paths", normalized_paths.clone());
graph.run(delete_classes).await?;
let delete_functions = query(
"
UNWIND $paths AS path
OPTIONAL MATCH (fn:Function { path: path })
DETACH DELETE fn
",
)
.param("paths", normalized_paths);
graph.run(delete_functions).await?;
println!("Neo4j cleanup: stale graph scope deleted.");
Ok(())
}
fn should_emit_parse_warnings_for_path(file_path: &str) -> bool {
let p = file_path.replace('\\', "/").to_lowercase();
if p.contains("/vendordocs/") {
return false;
}
if p.contains("/refund_issue/") {
return false;
}
true
}
fn emit_limited_parse_warnings(
label: &str,
file_path: &str,
warnings: Vec<(usize, usize, String)>,
max_per_file: usize,
) {
if !should_emit_parse_warnings_for_path(file_path) {
return;
}
let total = warnings.len();
if max_per_file == 0 {
for (line, col, snippet) in warnings {
println!(
"{label} parse warning {file_path}:{line}:{col} - {snippet}",
);
}
return;
}
for (i, (line, col, snippet)) in warnings.into_iter().enumerate() {
if i >= max_per_file {
let rest = total.saturating_sub(i);
if rest > 0 {
println!(
"{label} parse warning {file_path}: ... {rest} more suppressed (set max_parse_warnings_per_file to 0 for unlimited)",
);
}
break;
}
println!(
"{label} parse warning {file_path}:{line}:{col} - {snippet}",
);
}
}
fn should_log_unresolved_import(
verbose_imports: bool,
is_stdlib: bool,
is_third_party: bool,
) -> bool {
verbose_imports || (!is_stdlib && !is_third_party)
}
pub async fn persist_files_to_neo4j(
cfg: &Neo4jConfig,
root: &Path,
files: &[ParsedFile],
clean: bool,
follow_symlinks: bool,
persistence: &GraphPersistenceOptions,
) -> Result<(), GraphError> {
let graph = Graph::new(&cfg.uri, &cfg.user, &cfg.password).await?;
if clean {
println!("Neo4j: deleting all existing nodes and relationships...");
let delete_query = query("MATCH (n) DETACH DELETE n");
graph.run(delete_query).await?;
println!("Neo4j: database cleaned, starting fresh graph construction");
}
let mut known_paths: HashSet<String> = HashSet::new();
for file in files {
known_paths.insert(neo4j_path_string(root, &file.path));
}
let go_modules = discover_go_modules(root, follow_symlinks).unwrap_or_else(|e| {
eprintln!("Neo4j: warning: could not discover go.mod modules: {e}");
Vec::new()
});
let go_replaces = discover_go_replaces(root, follow_symlinks).unwrap_or_else(|e| {
eprintln!("Neo4j: warning: could not discover go.mod replace directives: {e}");
Vec::new()
});
let csharp_batch_index = build_csharp_batch_index(files, root);
let compressor_client = if persistence.compressor.enabled {
match CompressorClient::from_config(&persistence.compressor) {
Ok(client) => {
if let Err(e) = client.health_check().await {
eprintln!("RedCompressor: health check failed ({e}); compression may be unavailable");
}
Some(client)
}
Err(e) => {
eprintln!("RedCompressor: failed to create client ({e}); skipping code_bytes");
None
}
}
} else {
None
};
let compressor = compressor_client.as_ref();
let mut accumulator = BatchAccumulator::new();
let erlang_module_index = build_erlang_module_index(files);
let total_files = files.len();
let mut erlang_futures: FuturesUnordered<_> = FuturesUnordered::new();
let erlang_accumulator: SharedBatchAccumulator =
Arc::new(Mutex::new(BatchAccumulator::new()));
for (idx, file) in files.iter().enumerate() {
let language = file.language.to_string();
let path = neo4j_path_string(root, &file.path);
let project_name = derive_project_name(&file.path, root);
println!(
"Neo4j: processing file {}/{} ({})",
idx + 1,
total_files,
path
);
let q = query(
"
MERGE (f:File { path: $path })
ON CREATE SET f.language = $language,
f.project_name = $project_name,
f.is_test = $is_test
ON MATCH SET f.language = $language,
f.project_name = $project_name,
f.is_test = $is_test
",
)
.param("path", path.clone())
.param("language", language.clone())
.param("project_name", project_name.clone())
.param("is_test", file.is_test);
graph.run(q).await?;
let source = &file.source;
match file.language {
LanguageId::Java => {
persist_java_structure(
&graph,
&path,
file,
source,
&known_paths,
project_name.clone(),
&mut accumulator,
persistence,
compressor,
)
.await?;
}
LanguageId::CSharp => {
persist_csharp_structure(
&graph,
&path,
file,
source,
project_name.clone(),
&known_paths,
&csharp_batch_index,
&mut accumulator,
compressor,
)
.await?;
}
LanguageId::Erlang => {
erlang_futures.push(persist_erlang_structure(
&graph,
file,
path.clone(),
source,
project_name.clone(),
&erlang_module_index,
erlang_accumulator.clone(),
compressor,
));
if erlang_futures.len() >= MAX_CONCURRENT_ERLANG_WRITES {
if let Some(res) = erlang_futures.next().await {
res?;
}
flush_shared_accumulator_if_needed(&erlang_accumulator, &graph).await?;
}
}
LanguageId::Go => {
persist_go_structure(
&graph,
&path,
file,
source,
&known_paths,
project_name.clone(),
&mut accumulator,
root,
&go_modules,
&go_replaces,
persistence,
compressor,
)
.await?;
}
_ => {
persist_non_java_functions(
&graph,
&path,
file,
source,
project_name.clone(),
&known_paths,
&mut accumulator,
persistence,
compressor,
)
.await?;
}
}
if accumulator.should_flush() {
accumulator.flush(&graph).await?;
}
flush_shared_accumulator_if_needed(&erlang_accumulator, &graph).await?;
}
while let Some(res) = erlang_futures.next().await {
res?;
flush_shared_accumulator_if_needed(&erlang_accumulator, &graph).await?;
}
accumulator.flush(&graph).await?;
flush_shared_accumulator_force(&erlang_accumulator, &graph).await?;
let same_api_query = query(
"
MATCH (ep:ApiEndpoint)
MATCH (ext:ExternalApi)
WHERE ep.norm_path IS NOT NULL
AND ext.norm_path IS NOT NULL
AND ep.norm_path = ext.norm_path
MERGE (ep)-[:SAME_API]->(ext)
",
);
graph.run(same_api_query).await?;
println!("Neo4j: finished processing {} files.", total_files);
Ok(())
}
async fn persist_java_structure(
graph: &Graph,
file_path: &str,
file: &ParsedFile,
source: &str,
known_paths: &HashSet<String>,
project_name: Option<String>,
accumulator: &mut BatchAccumulator,
persistence: &GraphPersistenceOptions,
compressor: Option<&CompressorClient>,
) -> Result<(), GraphError> {
let package = extract_java_package(source);
let (classes, methods) = extract_java_symbols(&file.tree, source, package.as_deref());
let class_spans = extract_java_class_spans(&file.tree, source, package.as_deref());
let method_spans = extract_java_method_body_spans(&file.tree, source, package.as_deref());
emit_limited_parse_warnings(
"Java",
file_path,
extract_java_parse_warnings(&file.tree, source),
persistence.max_parse_warnings_per_file,
);
let class_ann_map: HashMap<String, Vec<String>> =
extract_java_class_annotations(&file.tree, source, package.as_deref())
.into_iter()
.collect();
let method_ann_map: HashMap<String, Vec<String>> =
extract_java_method_annotations(&file.tree, source, package.as_deref())
.into_iter()
.collect();
for class in &classes {
let annotations = class_ann_map
.get(&class.fqn)
.cloned()
.unwrap_or_default();
let code_bytes = code_bytes_for_span(
compressor,
source,
class_spans.get(&class.fqn).copied(),
LanguageId::Java,
)
.await;
let q = query(
"
MATCH (f:File { path: $path })
MERGE (c:Class { fqn: $class_fqn })
ON CREATE SET c.name = $class_name,
c.path = $path,
c.project_name = $project_name,
c.annotations = $annotations,
c.code_bytes = $code_bytes
ON MATCH SET c.name = $class_name,
c.path = $path,
c.project_name = $project_name,
c.annotations = $annotations,
c.code_bytes = coalesce($code_bytes, c.code_bytes)
MERGE (f)-[:DECLARES_CLASS]->(c)
",
)
.param("path", file_path.to_string())
.param("class_fqn", class.fqn.clone())
.param("class_name", class.name.clone())
.param("project_name", project_name.clone())
.param("annotations", annotations)
.param(props::CODE_BYTES, code_bytes);
graph.run(q).await?;
}
for func in &methods {
let fn_annotations = method_ann_map
.get(&func.fqn)
.cloned()
.unwrap_or_default();
let code_bytes = code_bytes_for_span(
compressor,
source,
method_spans.get(&func.fqn).copied(),
LanguageId::Java,
)
.await;
match &func.class_fqn {
Some(class_fqn) => {
let q = query(
"
MATCH (f:File { path: $path })
MERGE (cls:Class { fqn: $class_fqn })
MERGE (fn:Function { fqn: $fn_fqn })
ON CREATE SET fn.name = $fn_name,
fn.path = $path,
fn.project_name = $project_name,
fn.return_type = $return_type,
fn.param_types = $param_types,
fn.param_count = $param_count,
fn.annotations = $fn_annotations,
fn.code_bytes = $code_bytes
ON MATCH SET fn.name = $fn_name,
fn.path = $path,
fn.project_name = $project_name,
fn.return_type = coalesce($return_type, fn.return_type),
fn.param_types = coalesce($param_types, fn.param_types),
fn.param_count = coalesce($param_count, fn.param_count),
fn.annotations = $fn_annotations,
fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
MERGE (f)-[:DECLARES_FUNCTION]->(fn)
MERGE (cls)-[:DECLARES_FUNCTION]->(fn)
",
)
.param("path", file_path.to_string())
.param("class_fqn", class_fqn.clone())
.param("fn_fqn", func.fqn.clone())
.param("fn_name", func.name.clone())
.param("project_name", project_name.clone())
.param("return_type", func.return_type.clone())
.param("param_types", func.param_types.clone())
.param("param_count", func.param_count as i64)
.param("fn_annotations", fn_annotations)
.param(props::CODE_BYTES, code_bytes.clone());
graph.run(q).await?;
}
None => {
let q = query(
"
MATCH (f:File { path: $path })
MERGE (fn:Function { fqn: $fn_fqn })
ON CREATE SET fn.name = $fn_name,
fn.path = $path,
fn.project_name = $project_name,
fn.return_type = $return_type,
fn.param_types = $param_types,
fn.param_count = $param_count,
fn.annotations = $fn_annotations,
fn.code_bytes = $code_bytes
ON MATCH SET fn.name = $fn_name,
fn.path = $path,
fn.project_name = $project_name,
fn.return_type = coalesce($return_type, fn.return_type),
fn.param_types = coalesce($param_types, fn.param_types),
fn.param_count = coalesce($param_count, fn.param_count),
fn.annotations = $fn_annotations,
fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
MERGE (f)-[:DECLARES_FUNCTION]->(fn)
",
)
.param("path", file_path.to_string())
.param("fn_fqn", func.fqn.clone())
.param("fn_name", func.name.clone())
.param("project_name", project_name.clone())
.param("return_type", func.return_type.clone())
.param("param_types", func.param_types.clone())
.param("param_count", func.param_count as i64)
.param("fn_annotations", fn_annotations)
.param(props::CODE_BYTES, code_bytes);
graph.run(q).await?;
}
}
}
for (derived, base) in extract_java_inheritance_edges(&file.tree, source, package.as_deref()) {
accumulator.add_class_uses_class(derived, base);
}
for (cls, dep) in extract_java_injected_dependencies(&file.tree, source, package.as_deref()) {
accumulator.add_class_uses_class(cls, dep);
}
let calls = extract_java_calls(&file.tree, source, package.as_deref());
for (caller_fqn, callee_fqn) in calls {
accumulator.add_calls_function(caller_fqn, callee_fqn);
}
let internal_imports = extract_internal_java_imports(source);
for import_fqn in internal_imports {
if let Some(dep_path) = map_import_to_project_path(file_path, &import_fqn) {
if !known_paths.contains(&dep_path) {
continue;
}
let dep_query = query(
"
MERGE (src:File { path: $src_path })
MERGE (dst:File { path: $dst_path })
MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
",
)
.param("src_path", file_path.to_string())
.param("dst_path", dep_path.clone());
graph.run(dep_query).await?;
}
}
let endpoints = extract_java_spring_endpoints(source);
for (http_methods, path_template, handler_name) in endpoints {
let norm_path = normalize_api_path(&path_template);
let api_query = query(
"
MERGE (api:ApiEndpoint { path: $path })
ON CREATE SET api.methods = $methods,
api.protocol = 'http',
api.framework = 'spring',
api.project_name = $project_name,
api.norm_path = $norm_path
ON MATCH SET api.methods = $methods,
api.protocol = coalesce(api.protocol, 'http'),
api.framework = coalesce(api.framework, 'spring'),
api.project_name = coalesce(api.project_name, $project_name),
api.norm_path = coalesce(api.norm_path, $norm_path)
",
)
.param("path", path_template.clone())
.param("methods", http_methods.clone())
.param("project_name", project_name.clone())
.param("norm_path", norm_path.clone());
graph.run(api_query).await?;
for func in &methods {
if func.name != handler_name {
continue;
}
let rel_query = query(
"
MERGE (fn:Function { fqn: $fn_fqn })
MERGE (api:ApiEndpoint { path: $path })
MERGE (api)-[:HANDLED_BY]->(fn)
",
)
.param("fn_fqn", func.fqn.clone())
.param("path", path_template.clone());
graph.run(rel_query).await?;
}
}
let external_urls = extract_external_http_urls(source);
for full_url in external_urls {
let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
let base_url = format!("{protocol}://{host}");
let name = host.clone();
let norm_path = normalize_api_path(&path);
let ext_query = query(
"
MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
ON CREATE SET ext.name = $name,
ext.path = $path,
ext.protocol = $protocol,
ext.provider = $provider,
ext.project_name = $project_name
ON MATCH SET ext.name = coalesce(ext.name, $name),
ext.path = coalesce(ext.path, $path),
ext.protocol = coalesce(ext.protocol, $protocol),
ext.provider = coalesce(ext.provider, $provider),
ext.project_name = coalesce(ext.project_name, $project_name)
",
)
.param("name", name.clone())
.param("base_url", base_url.clone())
.param("path", path.clone())
.param("norm_path", norm_path.clone())
.param("protocol", protocol.clone())
.param("provider", name.clone())
.param("project_name", project_name.clone());
graph.run(ext_query).await?;
for func in &methods {
accumulator.add_calls_external_api(
func.fqn.clone(),
base_url.clone(),
norm_path.clone(),
);
}
}
let used_classes = extract_java_used_classes(&file.tree, source, package.as_deref());
for (fn_fqn, class_fqn) in used_classes {
accumulator.add_uses_class(fn_fqn, class_fqn);
}
Ok(())
}
async fn persist_csharp_structure(
graph: &Graph,
file_path: &str,
file: &ParsedFile,
source: &str,
project_name: Option<String>,
known_paths: &HashSet<String>,
csharp_index: &CSharpBatchIndex,
accumulator: &mut BatchAccumulator,
compressor: Option<&CompressorClient>,
) -> Result<(), GraphError> {
let language = file.language.to_string();
let namespace = extract_csharp_namespace(&file.tree, source);
let using_summary = extract_csharp_using_summary(&file.tree, source);
let (classes, methods, property_symbols) = extract_csharp_symbols(&file.tree, source);
let class_spans = extract_csharp_class_spans(&file.tree, source);
let property_spans = extract_csharp_property_spans(&file.tree, source);
let method_spans = extract_csharp_method_body_spans_map(&file.tree, source, namespace.as_deref());
for chunk in classes.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
let class_fqns: Vec<String> = chunk.iter().map(|c| c.fqn.clone()).collect();
let class_names: Vec<String> = chunk.iter().map(|c| c.name.clone()).collect();
let class_kinds: Vec<String> = chunk
.iter()
.map(|c| c.kind.unwrap_or("class").to_string())
.collect();
let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
for c in chunk {
code_bytes_list.push(
code_bytes_for_span(
compressor,
source,
class_spans.get(&c.fqn).copied(),
LanguageId::CSharp,
)
.await,
);
}
let q = query(
"
UNWIND range(0, size($class_fqns) - 1) AS i
WITH $file_path AS path, $class_fqns[i] AS class_fqn, $class_names[i] AS class_name,
$class_kinds[i] AS class_kind, $project_name AS project_name, $language AS language,
$code_bytes_list[i] AS code_bytes
MATCH (f:File { path: path })
MERGE (c:Class { fqn: class_fqn })
ON CREATE SET c.name = class_name, c.path = path, c.project_name = project_name,
c.language = language, c.kind = class_kind, c.code_bytes = code_bytes
ON MATCH SET c.name = class_name, c.project_name = project_name, c.language = language,
c.kind = class_kind, c.code_bytes = coalesce(code_bytes, c.code_bytes)
MERGE (f)-[:DECLARES_CLASS]->(c)
",
)
.param("file_path", file_path.to_string())
.param("class_fqns", class_fqns)
.param("class_names", class_names)
.param("class_kinds", class_kinds)
.param("project_name", project_name.clone())
.param("language", language.clone())
.param("code_bytes_list", code_bytes_list);
graph.run(q).await?;
}
for chunk in property_symbols.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
let class_fqns: Vec<String> = chunk.iter().map(|p| p.class_fqn.clone()).collect();
let prop_fqns: Vec<String> = chunk.iter().map(|p| p.fqn.clone()).collect();
let prop_names: Vec<String> = chunk.iter().map(|p| p.name.clone()).collect();
let decl_types: Vec<Option<String>> = chunk.iter().map(|p| p.declared_type.clone()).collect();
let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
for p in chunk {
code_bytes_list.push(
code_bytes_for_span(
compressor,
source,
property_spans.get(&p.fqn).copied(),
LanguageId::CSharp,
)
.await,
);
}
let q = query(
"
UNWIND range(0, size($prop_fqns) - 1) AS i
WITH $class_fqns[i] AS class_fqn, $prop_fqns[i] AS prop_fqn, $prop_names[i] AS prop_name,
$decl_types[i] AS declared_type, $file_path AS path,
$project_name AS project_name, $language AS language,
$code_bytes_list[i] AS code_bytes
MATCH (c:Class { fqn: class_fqn })
MERGE (p:Property { fqn: prop_fqn })
ON CREATE SET p.name = prop_name, p.path = path, p.project_name = project_name,
p.language = language, p.declared_type = declared_type, p.code_bytes = code_bytes
ON MATCH SET p.name = prop_name, p.project_name = project_name, p.language = language,
p.declared_type = coalesce(declared_type, p.declared_type),
p.code_bytes = coalesce(code_bytes, p.code_bytes)
MERGE (c)-[:DECLARES_PROPERTY]->(p)
",
)
.param("class_fqns", class_fqns)
.param("prop_fqns", prop_fqns)
.param("prop_names", prop_names)
.param("decl_types", decl_types)
.param("file_path", file_path.to_string())
.param("project_name", project_name.clone())
.param("language", language.clone())
.param("code_bytes_list", code_bytes_list);
graph.run(q).await?;
}
let methods_with_class: Vec<&FunctionSymbol> =
methods.iter().filter(|f| f.class_fqn.is_some()).collect();
for chunk in methods_with_class.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
let class_fqns: Vec<String> = chunk
.iter()
.map(|f| f.class_fqn.clone().unwrap_or_default())
.collect();
let fn_fqns: Vec<String> = chunk.iter().map(|f| f.fqn.clone()).collect();
let fn_names: Vec<String> = chunk.iter().map(|f| f.name.clone()).collect();
let return_types: Vec<Option<String>> = chunk.iter().map(|f| f.return_type.clone()).collect();
let param_types_list: Vec<Vec<String>> = chunk.iter().map(|f| f.param_types.clone()).collect();
let param_counts: Vec<i64> = chunk.iter().map(|f| f.param_count as i64).collect();
let modifiers_list: Vec<Vec<String>> = chunk.iter().map(|f| f.modifiers.clone()).collect();
let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
for f in chunk {
code_bytes_list.push(
code_bytes_for_span(
compressor,
source,
method_spans.get(&f.fqn).copied(),
LanguageId::CSharp,
)
.await,
);
}
let q = query(
"
UNWIND range(0, size($fn_fqns) - 1) AS i
WITH $file_path AS path, $class_fqns[i] AS class_fqn, $fn_fqns[i] AS fn_fqn,
$fn_names[i] AS fn_name, $return_types[i] AS return_type,
$param_types_list[i] AS param_types, $param_counts[i] AS param_count,
$modifiers_list[i] AS modifiers, $project_name AS project_name, $language AS language,
$code_bytes_list[i] AS code_bytes
MATCH (f:File { path: path })
MERGE (cls:Class { fqn: class_fqn })
MERGE (fn:Function { fqn: fn_fqn })
ON CREATE SET fn.name = fn_name, fn.path = path, fn.project_name = project_name,
fn.language = language, fn.return_type = return_type,
fn.param_types = param_types, fn.param_count = param_count,
fn.modifiers = modifiers, fn.code_bytes = code_bytes
ON MATCH SET fn.name = fn_name, fn.project_name = project_name, fn.language = language,
fn.return_type = coalesce(return_type, fn.return_type),
fn.param_types = coalesce(param_types, fn.param_types),
fn.param_count = coalesce(param_count, fn.param_count),
fn.modifiers = coalesce(modifiers, fn.modifiers),
fn.code_bytes = coalesce(code_bytes, fn.code_bytes)
MERGE (f)-[:DECLARES_FUNCTION]->(fn)
MERGE (cls)-[:DECLARES_FUNCTION]->(fn)
",
)
.param("file_path", file_path.to_string())
.param("class_fqns", class_fqns)
.param("fn_fqns", fn_fqns)
.param("fn_names", fn_names)
.param("return_types", return_types)
.param("param_types_list", param_types_list)
.param("param_counts", param_counts)
.param("modifiers_list", modifiers_list)
.param("project_name", project_name.clone())
.param("language", language.clone())
.param("code_bytes_list", code_bytes_list);
graph.run(q).await?;
}
let methods_top: Vec<&FunctionSymbol> = methods.iter().filter(|f| f.class_fqn.is_none()).collect();
for chunk in methods_top.chunks(CSHARP_NODE_BATCH_FLUSH_THRESHOLD.max(1)) {
let fn_fqns: Vec<String> = chunk.iter().map(|f| f.fqn.clone()).collect();
let fn_names: Vec<String> = chunk.iter().map(|f| f.name.clone()).collect();
let return_types: Vec<Option<String>> = chunk.iter().map(|f| f.return_type.clone()).collect();
let param_types_list: Vec<Vec<String>> = chunk.iter().map(|f| f.param_types.clone()).collect();
let param_counts: Vec<i64> = chunk.iter().map(|f| f.param_count as i64).collect();
let modifiers_list: Vec<Vec<String>> = chunk.iter().map(|f| f.modifiers.clone()).collect();
let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(chunk.len());
for f in chunk {
code_bytes_list.push(
code_bytes_for_span(
compressor,
source,
method_spans.get(&f.fqn).copied(),
LanguageId::CSharp,
)
.await,
);
}
let q = query(
"
UNWIND range(0, size($fn_fqns) - 1) AS i
WITH $file_path AS path, $fn_fqns[i] AS fn_fqn, $fn_names[i] AS fn_name,
$return_types[i] AS return_type, $param_types_list[i] AS param_types,
$param_counts[i] AS param_count, $modifiers_list[i] AS modifiers,
$project_name AS project_name, $language AS language,
$code_bytes_list[i] AS code_bytes
MATCH (f:File { path: path })
MERGE (fn:Function { fqn: fn_fqn })
ON CREATE SET fn.name = fn_name, fn.path = path, fn.project_name = project_name,
fn.language = language, fn.return_type = return_type,
fn.param_types = param_types, fn.param_count = param_count,
fn.modifiers = modifiers, fn.code_bytes = code_bytes
ON MATCH SET fn.name = fn_name, fn.project_name = project_name, fn.language = language,
fn.return_type = coalesce(return_type, fn.return_type),
fn.param_types = coalesce(param_types, fn.param_types),
fn.param_count = coalesce(param_count, fn.param_count),
fn.modifiers = coalesce(modifiers, fn.modifiers),
fn.code_bytes = coalesce(code_bytes, fn.code_bytes)
MERGE (f)-[:DECLARES_FUNCTION]->(fn)
",
)
.param("file_path", file_path.to_string())
.param("fn_fqns", fn_fqns)
.param("fn_names", fn_names)
.param("return_types", return_types)
.param("param_types_list", param_types_list)
.param("param_counts", param_counts)
.param("modifiers_list", modifiers_list)
.param("project_name", project_name.clone())
.param("language", language.clone())
.param("code_bytes_list", code_bytes_list);
graph.run(q).await?;
}
let endpoints = extract_csharp_api_endpoints_from_tree(&file.tree, source);
for (methods_http, path_template, handler_name) in endpoints {
let norm_path = normalize_api_path(&path_template);
let api_query = query(
"
MERGE (api:ApiEndpoint { path: $path })
ON CREATE SET api.methods = $methods,
api.protocol = 'http',
api.framework = 'aspnet',
api.project_name = $project_name,
api.norm_path = $norm_path
ON MATCH SET api.methods = $methods,
api.protocol = coalesce(api.protocol, 'http'),
api.framework = coalesce(api.framework, 'aspnet'),
api.project_name = coalesce(api.project_name, $project_name),
api.norm_path = coalesce(api.norm_path, $norm_path)
",
)
.param("path", path_template.clone())
.param("methods", methods_http.clone())
.param("project_name", project_name.clone())
.param("norm_path", norm_path.clone());
graph.run(api_query).await?;
for func in &methods {
if func.name != handler_name {
continue;
}
let rel_query = query(
"
MERGE (fn:Function { fqn: $fn_fqn })
MERGE (api:ApiEndpoint { path: $path })
MERGE (api)-[:HANDLED_BY]->(fn)
",
)
.param("fn_fqn", func.fqn.clone())
.param("path", path_template.clone());
graph.run(rel_query).await?;
}
}
let url_spans = extract_csharp_external_http_urls_with_spans(&file.tree, source);
let method_spans = csharp_method_body_spans(&file.tree, source, namespace.as_deref());
let mut spans_by_fqn: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
for (fqn, lo, hi) in method_spans {
spans_by_fqn.entry(fqn).or_default().push((lo, hi));
}
for (full_url, u_start, u_end) in url_spans {
let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
let base_url = format!("{protocol}://{host}");
let name = host.clone();
let norm_path = normalize_api_path(&path);
let ext_query = query(
"
MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
ON CREATE SET ext.name = $name,
ext.path = $path,
ext.protocol = $protocol,
ext.provider = $provider,
ext.project_name = $project_name
ON MATCH SET ext.name = coalesce(ext.name, $name),
ext.path = coalesce(ext.path, $path),
ext.protocol = coalesce(ext.protocol, $protocol),
ext.provider = coalesce(ext.provider, $provider),
ext.project_name = coalesce(ext.project_name, $project_name)
",
)
.param("name", name.clone())
.param("base_url", base_url.clone())
.param("path", path.clone())
.param("norm_path", norm_path.clone())
.param("protocol", protocol.clone())
.param("provider", name.clone())
.param("project_name", project_name.clone());
graph.run(ext_query).await?;
for func in &methods {
let Some(ranges) = spans_by_fqn.get(&func.fqn) else {
continue;
};
if !ranges
.iter()
.any(|(lo, hi)| *lo <= u_start && u_end <= *hi)
{
continue;
}
accumulator.add_calls_external_api(
func.fqn.clone(),
base_url.clone(),
norm_path.clone(),
);
}
}
for ns in &using_summary.namespace_imports {
let Some(dep_paths) = csharp_index.namespace_to_paths.get(ns) else {
continue;
};
for dep_path in dep_paths {
if dep_path == file_path || !known_paths.contains(dep_path) {
continue;
}
let dep_query = query(
"
MERGE (src:File { path: $src_path })
MERGE (dst:File { path: $dst_path })
MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
",
)
.param("src_path", file_path.to_string())
.param("dst_path", dep_path.clone());
graph.run(dep_query).await?;
}
}
for (derived, base) in extract_csharp_class_inheritance_edges(
&file.tree,
source,
namespace.as_deref(),
&using_summary,
csharp_index,
) {
accumulator.add_class_uses_class(derived, base);
}
let used_classes = extract_csharp_used_classes(
&file.tree,
source,
namespace.as_deref(),
&using_summary,
csharp_index,
);
for (fn_fqn, class_fqn) in used_classes {
accumulator.add_uses_class(fn_fqn, class_fqn);
}
let calls = extract_csharp_calls(
&file.tree,
source,
namespace.as_deref(),
&using_summary,
csharp_index,
);
for (caller_fqn, callee_fqn) in calls {
accumulator.add_calls_function(caller_fqn, callee_fqn);
}
Ok(())
}
async fn persist_erlang_structure(
graph: &Graph,
file: &ParsedFile,
file_path: String,
source: &str,
project_name: Option<String>,
erlang_module_index: &HashMap<String, ErlangModuleSnapshot>,
erlang_accumulator: SharedBatchAccumulator,
compressor: Option<&CompressorClient>,
) -> Result<(), GraphError> {
let language = file.language.to_string();
let module_name = resolve_erlang_module_name(&file.path, &file.tree, source);
let erlang_meta = extract_erlang_behaviour_metadata_from_tree(&file.tree, source);
if let Some(module_name) = module_name.as_ref() {
let module_code_bytes = match compressor {
Some(client) => compress_full_source(source, LanguageId::Erlang, client).await,
None => None,
};
let module_query = query(
"
MATCH (f:File { path: $path })
MERGE (m:Module { name: $module_name, path: $path })
ON CREATE SET m.language = $language,
m.project_name = $project_name,
m.code_bytes = $code_bytes
ON MATCH SET m.language = $language,
m.project_name = $project_name,
m.code_bytes = coalesce($code_bytes, m.code_bytes)
MERGE (f)-[:DECLARES_MODULE]->(m)
",
)
.param("path", file_path.to_string())
.param("module_name", module_name.clone())
.param("language", language.clone())
.param("project_name", project_name.clone())
.param(props::CODE_BYTES, module_code_bytes);
graph.run(module_query).await?;
}
let functions = if let Some(module_name) = module_name.as_ref() {
extract_erlang_functions(module_name, &file.tree, source)
} else {
Vec::new()
};
let function_spans = module_name.as_ref().map(|module_name| {
extract_erlang_function_spans(module_name, &file.tree, source)
}).unwrap_or_default();
if !functions.is_empty() {
let module_name = module_name.as_ref().expect("module must exist when functions exist");
let mut fn_fqns: Vec<String> = Vec::with_capacity(functions.len());
let mut fn_names: Vec<String> = Vec::with_capacity(functions.len());
let mut fn_arities: Vec<i64> = Vec::with_capacity(functions.len());
let mut code_bytes_list: Vec<Option<Vec<u8>>> = Vec::with_capacity(functions.len());
for (fun_name, arity, fqn) in &functions {
fn_fqns.push(fqn.clone());
fn_names.push(fun_name.clone());
fn_arities.push(*arity as i64);
code_bytes_list.push(
code_bytes_for_span(
compressor,
source,
function_spans.get(fqn).copied(),
LanguageId::Erlang,
)
.await,
);
}
let fun_query = query(
"
MATCH (f:File { path: $path })
MATCH (m:Module { name: $module_name, path: $path })
WITH f, m,
$fn_fqns AS fn_fqns,
$fn_names AS fn_names,
$fn_arities AS fn_arities,
$code_bytes_list AS code_bytes_list,
$language AS language,
$path AS path,
$project_name AS project_name
UNWIND range(0, size(fn_fqns) - 1) AS idx
WITH f, m, language, path, project_name,
fn_fqns[idx] AS fn_fqn,
fn_names[idx] AS fn_name,
fn_arities[idx] AS arity,
code_bytes_list[idx] AS code_bytes
MERGE (fn:Function { fqn: fn_fqn })
ON CREATE SET fn.name = fn_name,
fn.path = path,
fn.language = language,
fn.project_name = project_name,
fn.arity = arity,
fn.code_bytes = code_bytes
ON MATCH SET fn.name = fn_name,
fn.path = path,
fn.language = language,
fn.project_name = project_name,
fn.arity = arity,
fn.code_bytes = coalesce(code_bytes, fn.code_bytes)
MERGE (f)-[:DECLARES_FUNCTION]->(fn)
MERGE (m)-[:DECLARES_FUNCTION]->(fn)
",
)
.param("path", file_path.to_string())
.param("module_name", module_name.clone())
.param("language", language.clone())
.param("fn_fqns", fn_fqns)
.param("fn_names", fn_names)
.param("fn_arities", fn_arities)
.param("code_bytes_list", code_bytes_list)
.param("project_name", project_name.clone());
graph.run(fun_query).await?;
}
let callback_contracts = collect_callback_contracts_for_module(
module_name.as_deref(),
&erlang_meta.behaviour_usages,
&erlang_meta.declared_callbacks,
&erlang_meta.optional_callbacks,
);
if let Some(module_name) = module_name.as_ref() {
for behaviour in &erlang_meta.behaviour_usages {
let behaviour_query = query(
"
MATCH (m:Module { name: $module_name, path: $path })
MERGE (b:Behaviour { name: $behaviour })
ON CREATE SET b.language = $language,
b.project_name = $project_name
ON MATCH SET b.language = coalesce(b.language, $language),
b.project_name = coalesce(b.project_name, $project_name)
MERGE (m)-[:IMPLEMENTS_BEHAVIOUR]->(b)
",
)
.param("module_name", module_name.clone())
.param("path", file_path.to_string())
.param("behaviour", behaviour.clone())
.param("language", language.clone())
.param("project_name", project_name.clone());
graph.run(behaviour_query).await?;
let dep_path = guess_erlang_file_path_from_module(&file_path, behaviour);
let module_dep_query = query(
"
MATCH (m:Module { name: $module_name, path: $path })
MERGE (dst:File { path: $dst_path })
MERGE (m)-[:DEPENDS_ON_FILE]->(dst)
",
)
.param("module_name", module_name.clone())
.param("path", file_path.to_string())
.param("dst_path", dep_path);
graph.run(module_dep_query).await?;
}
if !erlang_meta.declared_callbacks.is_empty() {
let file_declares_behaviour_query = query(
"
MATCH (f:File { path: $path })
MERGE (b:Behaviour { name: $behaviour })
ON CREATE SET b.path = $path,
b.language = $language,
b.project_name = $project_name
ON MATCH SET b.path = coalesce(b.path, $path),
b.language = coalesce(b.language, $language),
b.project_name = coalesce(b.project_name, $project_name)
MERGE (f)-[:DECLARES_BEHAVIOUR]->(b)
",
)
.param("path", file_path.to_string())
.param("behaviour", module_name.clone())
.param("language", language.clone())
.param("project_name", project_name.clone());
graph.run(file_declares_behaviour_query).await?;
}
for parent_behaviour in &erlang_meta.behaviour_extensions {
let extends_query = query(
"
MERGE (child:Behaviour { name: $child })
MERGE (parent:Behaviour { name: $parent })
MERGE (child)-[:EXTENDS_BEHAVIOUR]->(parent)
",
)
.param("child", module_name.clone())
.param("parent", parent_behaviour.clone());
graph.run(extends_query).await?;
}
}
for contract in &callback_contracts {
let callback_fqn = format!(
"{behaviour}:{name}/{arity}",
behaviour = contract.behaviour,
name = contract.name,
arity = contract.arity
);
let callback_query = query(
"
MERGE (b:Behaviour { name: $behaviour })
MERGE (cb:Callback { fqn: $cb_fqn })
ON CREATE SET cb.name = $cb_name,
cb.arity = $cb_arity,
cb.optional = $cb_optional,
cb.language = $language,
cb.project_name = $project_name
ON MATCH SET cb.name = coalesce(cb.name, $cb_name),
cb.arity = coalesce(cb.arity, $cb_arity),
cb.optional = $cb_optional,
cb.language = coalesce(cb.language, $language),
cb.project_name = coalesce(cb.project_name, $project_name)
MERGE (b)-[:DECLARES_CALLBACK]->(cb)
",
)
.param("behaviour", contract.behaviour.clone())
.param("cb_fqn", callback_fqn.clone())
.param("cb_name", contract.name.clone())
.param("cb_arity", contract.arity as i64)
.param("cb_optional", contract.optional)
.param("language", language.clone())
.param("project_name", project_name.clone());
graph.run(callback_query).await?;
}
let function_by_sig: HashMap<(String, u32), String> = functions
.iter()
.map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
.collect();
for contract in &callback_contracts {
let key = (contract.name.clone(), contract.arity);
let Some(fn_fqn) = function_by_sig.get(&key) else {
continue;
};
let callback_fqn = format!(
"{behaviour}:{name}/{arity}",
behaviour = contract.behaviour,
name = contract.name,
arity = contract.arity
);
let implements_query = query(
"
MERGE (fn:Function { fqn: $fn_fqn })
MERGE (cb:Callback { fqn: $cb_fqn })
MERGE (fn)-[:IMPLEMENTS_CALLBACK]->(cb)
",
)
.param("fn_fqn", fn_fqn.clone())
.param("cb_fqn", callback_fqn.clone());
graph.run(implements_query).await?;
}
for (name, arity) in erlang_meta.overridden_callbacks {
let key = (name.clone(), arity);
let Some(fn_fqn) = function_by_sig.get(&key) else {
continue;
};
for contract in callback_contracts
.iter()
.filter(|c| c.name == name && c.arity == arity)
{
let callback_fqn = format!(
"{behaviour}:{name}/{arity}",
behaviour = contract.behaviour,
name = contract.name,
arity = contract.arity
);
let overrides_query = query(
"
MERGE (fn:Function { fqn: $fn_fqn })
MERGE (cb:Callback { fqn: $cb_fqn })
MERGE (fn)-[:OVERRIDES_CALLBACK]->(cb)
",
)
.param("fn_fqn", fn_fqn.clone())
.param("cb_fqn", callback_fqn);
graph.run(overrides_query).await?;
}
}
let endpoints = extract_erlang_api_endpoints(&file.tree, source);
for (methods, path_template, handler_module) in endpoints {
let norm_path = normalize_api_path(&path_template);
let api_query = query(
"
MERGE (api:ApiEndpoint { path: $path })
ON CREATE SET api.methods = $methods,
api.protocol = 'http',
api.framework = 'cowboy',
api.project_name = $project_name,
api.norm_path = $norm_path
ON MATCH SET api.methods = $methods,
api.protocol = coalesce(api.protocol, 'http'),
api.framework = coalesce(api.framework, 'cowboy'),
api.project_name = coalesce(api.project_name, $project_name),
api.norm_path = coalesce(api.norm_path, $norm_path)
",
)
.param("path", path_template.clone())
.param("methods", methods.clone())
.param("project_name", project_name.clone())
.param("norm_path", norm_path.clone());
graph.run(api_query).await?;
let candidate_fqns = select_endpoint_handler_fqns(&handler_module, erlang_module_index);
for fqn in candidate_fqns {
let rel_query = query(
"
MERGE (fn:Function { fqn: $fn_fqn })
MERGE (api:ApiEndpoint { path: $path })
MERGE (api)-[:HANDLED_BY]->(fn)
",
)
.param("fn_fqn", fqn)
.param("path", path_template.clone());
graph.run(rel_query).await?;
}
}
let external_urls = extract_external_http_urls_from_tree(&file.tree, source);
for full_url in external_urls {
let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
let base_url = format!("{protocol}://{host}");
let name = host.clone();
let norm_path = normalize_api_path(&path);
let ext_query = query(
"
MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
ON CREATE SET ext.name = $name,
ext.path = $path,
ext.protocol = $protocol,
ext.provider = $provider,
ext.project_name = $project_name
ON MATCH SET ext.name = coalesce(ext.name, $name),
ext.path = coalesce(ext.path, $path),
ext.protocol = coalesce(ext.protocol, $protocol),
ext.provider = coalesce(ext.provider, $provider),
ext.project_name = coalesce(ext.project_name, $project_name)
",
)
.param("name", name.clone())
.param("base_url", base_url.clone())
.param("path", path.clone())
.param("norm_path", norm_path.clone())
.param("protocol", protocol.clone())
.param("provider", name.clone())
.param("project_name", project_name.clone());
graph.run(ext_query).await?;
if !functions.is_empty() {
let mut guard = erlang_accumulator.lock().await;
for (_fun_name, _arity, fqn) in &functions {
guard.add_calls_external_api(fqn.clone(), base_url.clone(), norm_path.clone());
}
}
}
let function_by_sig: HashMap<(String, u32), String> = functions
.iter()
.map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
.collect();
let call_edges: Vec<(String, String)> = extract_erlang_call_edges(
&file.tree,
source,
module_name.as_deref(),
&function_by_sig,
)
.into_iter()
.collect();
if !call_edges.is_empty() {
let mut guard = erlang_accumulator.lock().await;
for (caller_fqn, callee_fqn) in call_edges {
guard.add_calls_function(caller_fqn, callee_fqn);
}
}
let called_modules = extract_erlang_called_modules_from_tree(&file.tree, source);
for callee_mod in called_modules {
if module_name.as_deref() == Some(callee_mod.as_str()) {
continue;
}
let dep_path = guess_erlang_file_path_from_module(&file_path, &callee_mod);
let dep_query = query(
"
MERGE (src:File { path: $src_path })
MERGE (dst:File { path: $dst_path })
MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
",
)
.param("src_path", file_path.to_string())
.param("dst_path", dep_path);
graph.run(dep_query).await?;
if let Some(module_name) = module_name.as_ref() {
let module_dep_query = query(
"
MATCH (m:Module { name: $module_name, path: $path })
MERGE (dst:File { path: $dst_path })
MERGE (m)-[:DEPENDS_ON_FILE]->(dst)
",
)
.param("module_name", module_name.clone())
.param("path", file_path.to_string())
.param(
"dst_path",
guess_erlang_file_path_from_module(&file_path, &callee_mod),
);
graph.run(module_dep_query).await?;
}
}
Ok(())
}
async fn persist_non_java_functions(
graph: &Graph,
file_path: &str,
file: &ParsedFile,
source: &str,
project_name: Option<String>,
known_paths: &HashSet<String>,
accumulator: &mut BatchAccumulator,
persistence: &GraphPersistenceOptions,
compressor: Option<&CompressorClient>,
) -> Result<(), GraphError> {
let language = file.language.to_string();
match file.language {
LanguageId::Python => {
emit_limited_parse_warnings(
"Python",
file_path,
extract_python_parse_warnings(&file.tree, source),
persistence.max_parse_warnings_per_file,
);
}
LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
emit_limited_parse_warnings(
"JS/TS",
file_path,
extract_js_ts_parse_warnings(&file.tree, source),
persistence.max_parse_warnings_per_file,
);
}
_ => {}
}
let functions = extract_non_java_function_symbols(file, source, file_path);
let function_spans = extract_non_java_function_body_spans(file, source, file_path);
let mut name_to_fqn_depth: HashMap<String, (String, usize)> = HashMap::new();
for f in &functions {
let logical = f
.fqn
.split_once("::")
.map(|(_, l)| l)
.unwrap_or(f.fqn.as_str());
let (short, depth) = non_java_short_name_and_depth(file.language, logical);
name_to_fqn_depth
.entry(short)
.and_modify(|(existing_fqn, existing_depth)| {
if depth > *existing_depth {
*existing_fqn = f.fqn.clone();
*existing_depth = depth;
}
})
.or_insert_with(|| (f.fqn.clone(), depth));
}
let name_to_fqn: HashMap<String, String> = name_to_fqn_depth
.into_iter()
.map(|(k, (v, _))| (k, v))
.collect();
for func in &functions {
let code_bytes = code_bytes_for_span(
compressor,
source,
function_spans.get(&func.fqn).copied(),
file.language,
)
.await;
let q = query(
"
MATCH (f:File { path: $path })
MERGE (fn:Function { fqn: $fn_fqn })
ON CREATE SET fn.name = $fn_name,
fn.path = $path,
fn.project_name = $project_name,
fn.language = $language,
fn.code_bytes = $code_bytes
ON MATCH SET fn.name = $fn_name,
fn.path = $path,
fn.project_name = $project_name,
fn.language = $language,
fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
MERGE (f)-[:DECLARES_FUNCTION]->(fn)
",
)
.param("path", file_path.to_string())
.param("fn_fqn", func.fqn.clone())
.param("fn_name", func.name.clone())
.param("project_name", project_name.clone())
.param("language", language.clone())
.param(props::CODE_BYTES, code_bytes);
graph.run(q).await?;
}
match file.language {
LanguageId::Python => {
for imp in extract_python_import_modules(&file.tree, source) {
if let Some(dep) = resolve_python_import_to_known_file(&imp, known_paths) {
let dep_query = query(
"
MERGE (src:File { path: $src_path })
MERGE (dst:File { path: $dst_path })
MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
",
)
.param("src_path", file_path.to_string())
.param("dst_path", dep.clone());
graph.run(dep_query).await?;
} else if should_log_unresolved_import(
persistence.verbose_imports,
is_python_stdlib_top_level(&imp),
is_python_common_external_top_level(&imp),
) {
println!(
"Python import (unresolved to scanned files): `{}` in {}",
imp, file_path
);
}
}
for (caller, callee) in extract_python_intrafile_calls(
&file.tree,
source,
file_path,
&name_to_fqn,
) {
accumulator.add_calls_function(caller, callee);
}
}
LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
for spec in extract_js_ts_import_specifiers(&file.tree, source) {
if let Some(dep) =
resolve_js_ts_import_to_known_file(&spec, file_path, known_paths)
{
let dep_query = query(
"
MERGE (src:File { path: $src_path })
MERGE (dst:File { path: $dst_path })
MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
",
)
.param("src_path", file_path.to_string())
.param("dst_path", dep.clone());
graph.run(dep_query).await?;
} else if persistence.verbose_imports {
println!(
"JS/TS import (unresolved to scanned files): `{}` in {}",
spec, file_path
);
}
}
for (caller, callee) in extract_js_ts_intrafile_calls(
&file.tree,
source,
file_path,
file.language,
&name_to_fqn,
) {
accumulator.add_calls_function(caller, callee);
}
}
LanguageId::Rust => {
for use_path in extract_rust_use_paths(&file.tree, source) {
if let Some(dep) =
resolve_rust_use_to_known_file(&use_path, file_path, known_paths)
{
let dep_query = query(
"
MERGE (src:File { path: $src_path })
MERGE (dst:File { path: $dst_path })
MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
",
)
.param("src_path", file_path.to_string())
.param("dst_path", dep.clone());
graph.run(dep_query).await?;
} else if persistence.verbose_imports {
println!(
"Rust use (unresolved to scanned files): `{}` in {}",
use_path.join("::"),
file_path
);
}
}
for (caller, callee) in
extract_rust_intrafile_calls(&file.tree, source, file_path, &name_to_fqn)
{
accumulator.add_calls_function(caller, callee);
}
}
_ => {}
}
Ok(())
}
fn extract_internal_java_imports(source: &str) -> Vec<String> {
source
.lines()
.filter_map(|line| {
let line = line.trim();
if !line.starts_with("import ") || !line.ends_with(';') {
return None;
}
let body = &line["import ".len()..line.len() - 1];
let body = body.trim();
let body = body.strip_prefix("static ").map(str::trim).unwrap_or(body);
if !body.starts_with("com.redbus.genai.") {
return None;
}
Some(body.to_string())
})
.collect()
}
fn map_import_to_project_path(current_path: &str, import_fqn: &str) -> Option<String> {
let marker = "com/redbus/genai/";
let idx = current_path.find(marker)?;
let prefix = ¤t_path[..idx];
let relative = import_fqn.replace('.', "/") + ".java";
Some(format!("{prefix}{relative}"))
}
fn extract_java_package(source: &str) -> Option<String> {
for line in source.lines() {
let line = line.trim();
if !line.starts_with("package ") || !line.ends_with(';') {
continue;
}
let body = &line["package ".len()..line.len() - 1];
let body = body.trim();
if body.is_empty() {
continue;
}
return Some(body.to_string());
}
None
}
fn extract_erlang_module_name(source: &str) -> Option<String> {
for line in source.lines() {
let line = line.trim();
if !line.starts_with("-module(") || !line.ends_with(").") {
continue;
}
let inner = &line["-module(".len()..line.len() - 2]; let name = inner.trim();
if !name.is_empty() {
return Some(name.to_string());
}
}
None
}
fn extract_erlang_module_name_from_tree(tree: &Tree, source: &str) -> Option<String> {
let mut out: Option<String> = None;
let root = tree.root_node();
walk_tree(root, |node| {
if out.is_some() || node.kind() != "module_attribute" {
return;
}
if let Some(name_node) = node.child_by_field_name("name") {
let start = name_node.start_byte() as usize;
let end = name_node.end_byte() as usize;
if end <= source.len() && start < end {
let name = source[start..end].trim().trim_matches('\'').to_string();
if !name.is_empty() {
out = Some(name);
}
}
}
});
out
}
fn is_erlang_header_file(path: &Path) -> bool {
path.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext.eq_ignore_ascii_case("hrl"))
.unwrap_or(false)
}
fn resolve_erlang_module_name(path: &Path, tree: &Tree, source: &str) -> Option<String> {
if let Some(module_name) = extract_erlang_module_name_from_tree(tree, source)
.or_else(|| extract_erlang_module_name(source))
{
return Some(module_name);
}
if is_erlang_header_file(path) {
None
} else {
Some(guess_erlang_module_name_from_path(&path.display().to_string()))
}
}
fn guess_erlang_module_name_from_path(path: &str) -> String {
Path::new(path)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("unknown_module")
.to_string()
}
fn extract_erlang_behaviour_metadata_from_tree(tree: &Tree, source: &str) -> ErlangBehaviourMetadata {
let mut meta = ErlangBehaviourMetadata::default();
let mut seen_declared_callbacks: HashSet<(String, u32)> = HashSet::new();
let root = tree.root_node();
walk_tree(root, |node| match node.kind() {
"behaviour_attribute" => {
if let Some(name_node) = node.child_by_field_name("name") {
if let Some(name) = extract_erlang_name(name_node, source) {
meta.behaviour_usages.insert(name);
}
}
}
"callback" => {
let Some(fun_node) = node.child_by_field_name("fun") else {
return;
};
let Some(fun_name) = extract_erlang_name(fun_node, source) else {
return;
};
let mut cursor = node.walk();
for sig_node in node.children_by_field_name("sigs", &mut cursor) {
let Some(args_node) = sig_node.child_by_field_name("args") else {
continue;
};
let key = (fun_name.clone(), args_node.named_child_count() as u32);
if seen_declared_callbacks.insert(key.clone()) {
meta.declared_callbacks.push(key);
}
}
}
"optional_callbacks_attribute" => {
let mut cursor = node.walk();
for fa_node in node.children_by_field_name("callbacks", &mut cursor) {
if let Some((name, arity)) = extract_erlang_fa_pair(fa_node, source) {
meta.optional_callbacks.insert((name, arity));
}
}
}
"wild_attribute" => {
let Some(attr_node) = node.child_by_field_name("name") else {
return;
};
let Some(attr_name) = extract_wild_attribute_name(attr_node, source) else {
return;
};
let Some(value_node) = node.child_by_field_name("value") else {
return;
};
match attr_name.as_str() {
"extends_behaviour" => {
if let Some(parent_behaviour) = extract_single_name_expr(value_node, source) {
meta.behaviour_extensions.insert(parent_behaviour);
}
}
"override_callback" => {
let mut pairs: HashSet<(String, u32)> = HashSet::new();
walk_tree(value_node, |child| {
if child.kind() == "fa" {
if let Some((name, arity)) = extract_erlang_fa_pair(child, source) {
pairs.insert((name, arity));
}
}
});
if pairs.is_empty() {
if let Some(raw_value) = node_text(value_node, source) {
pairs.extend(parse_fa_pairs_from_expr_text(raw_value));
}
}
meta.overridden_callbacks.extend(pairs);
}
_ => {}
}
}
_ => {}
});
meta
}
fn extract_wild_attribute_name(attr_node: Node, source: &str) -> Option<String> {
let name_node = attr_node.child_by_field_name("name").unwrap_or(attr_node);
let mut name = extract_erlang_name(name_node, source)?;
if let Some(stripped) = name.strip_prefix('-') {
name = stripped.to_string();
}
Some(name)
}
fn extract_erlang_fa_pair(fa_node: Node, source: &str) -> Option<(String, u32)> {
if fa_node.kind() != "fa" {
return None;
}
let fun_node = fa_node.child_by_field_name("fun")?;
let arity_node = fa_node.child_by_field_name("arity")?;
let fun_name = extract_erlang_name(fun_node, source)?;
let raw_arity = node_text(arity_node, source)?;
let arity = parse_erlang_arity(raw_arity)?;
Some((fun_name, arity))
}
fn parse_erlang_arity(raw: &str) -> Option<u32> {
raw.trim().trim_start_matches('/').parse::<u32>().ok()
}
fn extract_single_name_expr(node: Node, source: &str) -> Option<String> {
let mut raw = node_text(node, source)?.trim();
while raw.starts_with('(') && raw.ends_with(')') && raw.len() >= 2 {
raw = raw[1..raw.len() - 1].trim();
}
normalize_erlang_name(raw)
}
fn parse_fa_pairs_from_expr_text(expr_text: &str) -> HashSet<(String, u32)> {
let mut out = HashSet::new();
let mut raw = expr_text.trim();
while raw.starts_with('(') && raw.ends_with(')') && raw.len() >= 2 {
raw = raw[1..raw.len() - 1].trim();
}
for token in raw.split(',') {
let token = token.trim();
let Some((name_raw, arity_raw)) = token.split_once('/') else {
continue;
};
let Some(name) = normalize_erlang_name(name_raw) else {
continue;
};
let Some(arity) = parse_erlang_arity(arity_raw) else {
continue;
};
out.insert((name, arity));
}
out
}
fn extract_erlang_name(node: Node, source: &str) -> Option<String> {
let raw = node_text(node, source)?;
normalize_erlang_name(raw)
}
fn node_text<'a>(node: Node, source: &'a str) -> Option<&'a str> {
let start = node.start_byte() as usize;
let end = node.end_byte() as usize;
if end <= source.len() && start < end {
Some(&source[start..end])
} else {
None
}
}
fn normalize_erlang_name(raw: &str) -> Option<String> {
let mut name = raw.trim();
if name.starts_with('\'') && name.ends_with('\'') && name.len() >= 2 {
name = &name[1..name.len() - 1];
}
if name.is_empty() {
return None;
}
if name.starts_with('?') {
return None;
}
if name
.chars()
.next()
.map(|c| c.is_ascii_uppercase())
.unwrap_or(false)
{
return None;
}
if name.chars().any(char::is_whitespace) {
return None;
}
Some(name.to_string())
}
fn known_behaviour_callbacks(behaviour: &str) -> &'static [(&'static str, u32)] {
match behaviour {
"gen_server" => &[
("init", 1),
("handle_call", 3),
("handle_cast", 2),
("handle_info", 2),
("terminate", 2),
("code_change", 3),
("format_status", 2),
],
"supervisor" => &[("init", 1)],
"gen_statem" => &[
("init", 1),
("callback_mode", 0),
("state_name", 3),
("state_name", 2),
("terminate", 3),
("code_change", 4),
("format_status", 2),
],
"cowboy_handler" => &[("init", 2)],
"cowboy_loop" => &[("init", 2), ("info", 3), ("terminate", 3)],
"cowboy_websocket" => &[
("init", 2),
("websocket_init", 1),
("websocket_handle", 2),
("websocket_info", 2),
("terminate", 3),
],
"cowboy_rest" => &[
("init", 2),
("allowed_methods", 2),
("content_types_provided", 2),
("content_types_accepted", 2),
("resource_exists", 2),
("is_authorized", 2),
("forbidden", 2),
("malformed_request", 2),
("delete_resource", 2),
("generate_etag", 2),
("last_modified", 2),
],
_ => &[],
}
}
fn collect_callback_contracts_for_module(
module_name: Option<&str>,
behaviour_usages: &HashSet<String>,
declared_callbacks: &[(String, u32)],
optional_callbacks: &HashSet<(String, u32)>,
) -> Vec<CallbackContract> {
let mut contracts = HashSet::new();
if let Some(module_name) = module_name {
for (name, arity) in declared_callbacks {
contracts.insert(CallbackContract {
behaviour: module_name.to_string(),
name: name.clone(),
arity: *arity,
optional: optional_callbacks.contains(&(name.clone(), *arity)),
});
}
}
for behaviour in behaviour_usages {
for (name, arity) in known_behaviour_callbacks(behaviour) {
contracts.insert(CallbackContract {
behaviour: behaviour.clone(),
name: (*name).to_string(),
arity: *arity,
optional: false,
});
}
}
contracts.into_iter().collect()
}
fn select_endpoint_handler_fqns(
handler_module: &str,
erlang_module_index: &HashMap<String, ErlangModuleSnapshot>,
) -> Vec<String> {
let Some(snapshot) = erlang_module_index.get(handler_module) else {
return Vec::new();
};
let mut fqns: Vec<String> = snapshot
.callback_signatures
.iter()
.filter(|sig| snapshot.implemented_signatures.contains(*sig))
.map(|(name, arity)| format!("{module}:{name}/{arity}", module = handler_module))
.collect();
fqns.sort();
fqns.dedup();
fqns
}
fn extract_erlang_functions(
module_name: &str,
tree: &Tree,
source: &str,
) -> Vec<(String, u32, String)> {
let mut seen: HashSet<(String, u32)> = HashSet::new();
let mut out: Vec<(String, u32, String)> = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "function_clause" || !is_top_level_erlang_function_clause(node) {
return;
}
let Some(name_node) = node.child_by_field_name("name") else {
return;
};
let Some(fun_name) = extract_erlang_name(name_node, source) else {
return;
};
let Some(args_node) = node.child_by_field_name("args") else {
return;
};
let arity = args_node.named_child_count() as u32;
if seen.insert((fun_name.clone(), arity)) {
let fqn = format!("{module}:{name}/{arity}", module = module_name, name = fun_name);
out.push((fun_name, arity, fqn));
}
});
out
}
fn is_top_level_erlang_function_clause(node: Node) -> bool {
let mut parent = node.parent();
let mut has_source_file_ancestor = false;
while let Some(p) = parent {
match p.kind() {
"fun_expr" => return false,
"source_file" => {
has_source_file_ancestor = true;
break;
}
_ => {
parent = p.parent();
}
}
}
has_source_file_ancestor
}
fn extract_erlang_call_edges(
tree: &Tree,
source: &str,
module_name: Option<&str>,
function_by_sig: &HashMap<(String, u32), String>,
) -> HashSet<(String, String)> {
let Some(module_name) = module_name else {
return HashSet::new();
};
let mut edges = HashSet::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "call" {
return;
}
let Some(caller_clause) = find_enclosing_function_clause(node) else {
return;
};
let Some(caller_name_node) = caller_clause.child_by_field_name("name") else {
return;
};
let Some(caller_name) = extract_erlang_name(caller_name_node, source) else {
return;
};
let Some(caller_args) = caller_clause.child_by_field_name("args") else {
return;
};
let caller_arity = caller_args.named_child_count() as u32;
let Some(caller_fqn) = function_by_sig.get(&(caller_name, caller_arity)) else {
return;
};
let Some(call_expr) = node.child_by_field_name("expr") else {
return;
};
let Some(call_args) = node.child_by_field_name("args") else {
return;
};
let callee_arity = call_args.named_child_count() as u32;
let Some(callee_name) = extract_local_call_name(call_expr, source) else {
return;
};
let callee_key = (callee_name, callee_arity);
let Some(callee_fqn) = function_by_sig.get(&callee_key) else {
return;
};
if caller_fqn != callee_fqn && caller_fqn.starts_with(module_name) && callee_fqn.starts_with(module_name) {
edges.insert((caller_fqn.clone(), callee_fqn.clone()));
}
});
edges
}
fn find_enclosing_function_clause(node: Node) -> Option<Node> {
let mut cur = node.parent();
while let Some(parent) = cur {
if parent.kind() == "function_clause" {
return Some(parent);
}
if parent.kind() == "source_file" {
break;
}
cur = parent.parent();
}
None
}
fn extract_local_call_name(expr_node: Node, source: &str) -> Option<String> {
match expr_node.kind() {
"_name" | "atom" | "var" => extract_erlang_name(expr_node, source),
"remote" => None,
_ => extract_erlang_name(expr_node, source),
}
}
fn extract_erlang_api_endpoints(
tree: &Tree,
source: &str,
) -> Vec<(Vec<String>, String, String)> {
let mut endpoints = Vec::new();
let mut seen: HashSet<(String, String)> = HashSet::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "tuple" {
return;
}
let mut cursor = node.walk();
let exprs: Vec<Node> = node.children_by_field_name("expr", &mut cursor).collect();
if exprs.len() < 2 {
return;
}
let Some(path_raw) = extract_erlang_string_literal(exprs[0], source) else {
return;
};
let path = normalize_api_path(&path_raw);
if !path.starts_with('/') {
return;
}
let Some(handler_name) = extract_erlang_name(exprs[1], source) else {
return;
};
let key = (path.clone(), handler_name.clone());
if seen.insert(key) {
endpoints.push((vec!["ANY".to_string()], path, handler_name));
}
});
endpoints
}
fn extract_external_http_urls(source: &str) -> Vec<String> {
let mut urls = HashSet::new();
for line in source.lines() {
let mut rest = line;
loop {
let start = match rest.find("http://").or_else(|| rest.find("https://")) {
Some(i) => i,
None => break,
};
let after = &rest[start..];
let end = after
.find(|c: char| c == '"' || c.is_whitespace() || c == '\'' || c == ')')
.unwrap_or(after.len());
let url = &after[..end];
if !url.is_empty() {
urls.insert(url.to_string());
}
rest = &after[end..];
}
}
urls.into_iter().collect()
}
fn extract_external_http_urls_from_tree(tree: &Tree, source: &str) -> Vec<String> {
let mut urls = HashSet::new();
let root = tree.root_node();
walk_tree(root, |node| {
let Some(s) = extract_erlang_string_literal(node, source) else {
return;
};
if s.starts_with("http://") || s.starts_with("https://") {
urls.insert(s);
}
});
urls.into_iter().collect()
}
fn scan_http_urls_in_span(source: &str, span_start: usize, span_end: usize) -> Vec<(String, usize, usize)> {
let mut out = Vec::new();
if span_start >= span_end || span_end > source.len() {
return out;
}
let mut rest_start = span_start;
while rest_start < span_end {
let Some(slice) = source.get(rest_start..span_end) else {
break;
};
let rel = match slice.find("http://").or_else(|| slice.find("https://")) {
Some(i) => i,
None => break,
};
let abs_start = rest_start + rel;
let Some(after) = source.get(abs_start..span_end) else {
break;
};
let end_rel = after
.find(|c: char| c == '"' || c.is_whitespace() || c == '\'' || c == ')')
.unwrap_or(after.len());
let url = after[..end_rel].trim();
if !url.is_empty() {
out.push((url.to_string(), abs_start, abs_start + end_rel));
}
rest_start = abs_start + end_rel.max(1);
}
out
}
fn extract_csharp_external_http_urls_with_spans(tree: &Tree, source: &str) -> Vec<(String, usize, usize)> {
let mut seen: HashSet<(String, usize, usize)> = HashSet::new();
let root = tree.root_node();
walk_tree(root, |node| {
let kind = node.kind();
if !matches!(
kind,
"string_literal"
| "verbatim_string_literal"
| "interpolated_string_text"
| "interpolated_verbatim_string_text"
) {
return;
}
let start = node.start_byte() as usize;
let end = node.end_byte() as usize;
for triple in scan_http_urls_in_span(source, start, end) {
seen.insert(triple);
}
});
seen.into_iter().collect()
}
fn csharp_method_body_spans(tree: &Tree, source: &str, namespace: Option<&str>) -> Vec<(String, usize, usize)> {
let mut out = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
match node.kind() {
"method_declaration" => {
let Some(body) = node.child_by_field_name("body") else {
return;
};
let Some(method_name) = csharp_method_simple_name(node, source) else {
return;
};
let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
let effective_ns = csharp_enclosing_namespace_prefix(node, source)
.or_else(|| namespace.map(|s| s.to_string()));
let fqn = match class_fqn {
Some(cf) => format!("{cf}.{method_name}"),
None => effective_ns
.as_ref()
.map(|ns| format!("{ns}.{method_name}"))
.unwrap_or(method_name),
};
let lo = body.start_byte() as usize;
let hi = body.end_byte() as usize;
out.push((fqn, lo, hi));
}
"constructor_declaration" => {
let Some(cf) = csharp_enclosing_type_fqn(node, source, namespace) else {
return;
};
let (_, fqn) = csharp_constructor_symbol_fqn(node, &cf, source);
let Some(body) = csharp_block_body(node) else {
return;
};
let lo = body.start_byte() as usize;
let hi = body.end_byte() as usize;
out.push((fqn, lo, hi));
}
"accessor_declaration" => {
let Some(cf) = csharp_enclosing_type_fqn(node, source, namespace) else {
return;
};
let Some(prop_name) = csharp_property_name_for_accessor(node, source) else {
return;
};
let prefix = csharp_accessor_kind_prefix(node, source);
let fqn = format!("{cf}.{}_{}", prefix, prop_name);
let Some(body) = csharp_block_body(node) else {
return;
};
let lo = body.start_byte() as usize;
let hi = body.end_byte() as usize;
out.push((fqn, lo, hi));
}
_ => {}
}
});
out
}
fn extract_erlang_called_modules_from_tree(tree: &Tree, source: &str) -> HashSet<String> {
let mut modules = HashSet::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "remote" {
return;
}
let Some(remote_module) = node.child_by_field_name("module") else {
return;
};
let Some(module_expr) = remote_module.child_by_field_name("module") else {
return;
};
if let Some(name) = extract_erlang_name(module_expr, source) {
modules.insert(name);
}
});
modules
}
fn split_url_protocol_host_and_path(url: &str) -> (Option<String>, String, String) {
if let Some(idx) = url.find("://") {
let proto = &url[..idx];
let rest = &url[idx + 3..];
let host_end = rest
.find(|c: char| c == '/' || c == '?')
.unwrap_or(rest.len());
let host = &rest[..host_end];
let path = if host_end < rest.len() {
&rest[host_end..]
} else {
"/"
};
(
Some(proto.to_string()),
host.to_string(),
path.to_string(),
)
} else {
(None, url.to_string(), "/".to_string())
}
}
fn normalize_api_path(raw: &str) -> String {
let mut p = raw.trim().to_string();
if let Some(idx) = p.find("://") {
let after = &p[idx + 3..];
if let Some(slash) = after.find('/') {
p = after[slash..].to_string();
} else {
return "/".to_string();
}
}
if let Some(idx) = p.find(|c: char| c == '?' || c == '#') {
p.truncate(idx);
}
if !p.starts_with('/') {
p.insert(0, '/');
}
if p.len() > 1 && p.ends_with('/') {
p.pop();
}
p
}
fn extract_erlang_string_literal(node: Node, source: &str) -> Option<String> {
if node.kind() != "string" {
return None;
}
let raw = node_text(node, source)?.trim();
if raw.starts_with('"') && raw.ends_with('"') && raw.len() >= 2 {
return Some(raw[1..raw.len() - 1].to_string());
}
let first = raw.find('"')?;
let last = raw.rfind('"')?;
if last > first {
return Some(raw[first + 1..last].to_string());
}
None
}
fn guess_erlang_file_path_from_module(current_path: &str, module_name: &str) -> String {
let base = Path::new(current_path)
.parent()
.map(|p| p.to_path_buf())
.unwrap_or_else(|| Path::new(".").to_path_buf());
path_str_slash(&base.join(format!("{module}.erl", module = module_name)))
}
fn node_byte_span(node: Node) -> (usize, usize) {
(
node.start_byte() as usize,
node.end_byte() as usize,
)
}
fn insert_widest_span(map: &mut HashMap<String, (usize, usize)>, key: String, span: (usize, usize)) {
map.entry(key)
.and_modify(|existing| {
existing.0 = existing.0.min(span.0);
existing.1 = existing.1.max(span.1);
})
.or_insert(span);
}
fn extract_java_class_spans(tree: &Tree, source: &str, package: Option<&str>) -> HashMap<String, (usize, usize)> {
let mut out = HashMap::new();
walk_tree(tree.root_node(), |node| {
if node.kind() != "class_declaration" && node.kind() != "interface_declaration" {
return;
}
let Some(name) = identifier_text_from_children(node, source) else {
return;
};
let fqn = package
.map(|pkg| format!("{pkg}.{name}"))
.unwrap_or(name);
insert_widest_span(&mut out, fqn, node_byte_span(node));
});
out
}
fn extract_java_method_body_spans(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> HashMap<String, (usize, usize)> {
let mut out = HashMap::new();
walk_tree(tree.root_node(), |node| {
if node.kind() != "method_declaration" {
return;
}
let Some(method_name) = identifier_text_from_children(node, source) else {
return;
};
let mut parent = node.parent();
let mut class_fqn: Option<String> = None;
while let Some(p) = parent {
let pk = p.kind();
if pk == "class_declaration" || pk == "interface_declaration" {
if let Some(class_name) = identifier_text_from_children(p, source) {
class_fqn = Some(
package
.map(|pkg| format!("{pkg}.{class_name}"))
.unwrap_or(class_name),
);
}
break;
}
parent = p.parent();
}
let fqn = if let Some(ref cls) = class_fqn {
format!("{cls}.{method_name}")
} else if let Some(pkg) = package {
format!("{pkg}.{method_name}")
} else {
method_name.clone()
};
let span = node
.child_by_field_name("body")
.map(node_byte_span)
.unwrap_or_else(|| node_byte_span(node));
insert_widest_span(&mut out, fqn, span);
});
out
}
fn extract_csharp_class_spans(tree: &Tree, source: &str) -> HashMap<String, (usize, usize)> {
let mut out = HashMap::new();
let namespace = extract_csharp_namespace(tree, source);
walk_tree(tree.root_node(), |node| {
match node.kind() {
"class_declaration"
| "interface_declaration"
| "struct_declaration"
| "enum_declaration"
| "record_declaration"
| "record_struct_declaration" => {
if let Some(fqn) = csharp_fqn_for_type_declaration(node, source, namespace.as_deref()) {
insert_widest_span(&mut out, fqn, node_byte_span(node));
}
}
_ => {}
}
});
out
}
fn extract_csharp_property_spans(tree: &Tree, source: &str) -> HashMap<String, (usize, usize)> {
let mut out = HashMap::new();
let namespace = extract_csharp_namespace(tree, source);
walk_tree(tree.root_node(), |node| {
if node.kind() != "property_declaration" {
return;
}
let Some(class_fqn) = csharp_enclosing_type_fqn(node, source, namespace.as_deref()) else {
return;
};
let Some(prop_name_node) = node.child_by_field_name("name") else {
return;
};
let Some(prop_name) = csharp_node_text(prop_name_node, source).filter(|s| !s.is_empty()) else {
return;
};
let fqn = format!("{class_fqn}.{prop_name}");
insert_widest_span(&mut out, fqn, node_byte_span(node));
});
out
}
fn extract_csharp_method_body_spans_map(
tree: &Tree,
source: &str,
namespace: Option<&str>,
) -> HashMap<String, (usize, usize)> {
csharp_method_body_spans(tree, source, namespace)
.into_iter()
.map(|(fqn, lo, hi)| (fqn, (lo, hi)))
.collect()
}
fn extract_erlang_function_spans(
module_name: &str,
tree: &Tree,
source: &str,
) -> HashMap<String, (usize, usize)> {
let mut out = HashMap::new();
walk_tree(tree.root_node(), |node| {
if node.kind() != "function_clause" || !is_top_level_erlang_function_clause(node) {
return;
}
let Some(name_node) = node.child_by_field_name("name") else {
return;
};
let Some(fun_name) = extract_erlang_name(name_node, source) else {
return;
};
let Some(args_node) = node.child_by_field_name("args") else {
return;
};
let arity = args_node.named_child_count() as u32;
let fqn = format!("{module_name}:{fun_name}/{arity}");
insert_widest_span(&mut out, fqn, node_byte_span(node));
});
out
}
fn extract_go_class_spans(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> HashMap<String, (usize, usize)> {
let mut out = HashMap::new();
walk_tree(tree.root_node(), |node| {
if node.kind() != "type_declaration" {
return;
}
let mut i = 0usize;
while let Some(child) = node.child(i) {
i += 1;
if child.kind() != "type_spec" {
continue;
}
let Some(type_n) = child.child_by_field_name("type") else {
continue;
};
if type_n.kind() != "struct_type" && type_n.kind() != "interface_type" {
continue;
}
let Some(name_node) = child.child_by_field_name("name") else {
continue;
};
let start = name_node.start_byte() as usize;
let end = (name_node.end_byte() as usize).min(source.len());
if start >= end {
continue;
}
let name = source[start..end].to_string();
let fqn = package
.map(|pkg| format!("{pkg}.{name}"))
.unwrap_or(name);
insert_widest_span(&mut out, fqn, node_byte_span(child));
}
});
out
}
fn extract_go_function_body_spans(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> HashMap<String, (usize, usize)> {
let mut out = HashMap::new();
walk_tree(tree.root_node(), |node| {
let kind = node.kind();
if kind != "function_declaration" && kind != "method_declaration" {
return;
}
let fqn = match kind {
"function_declaration" => go_decl_fqn_from_function_declaration(node, source, package),
"method_declaration" => go_decl_fqn_from_method_declaration(node, source, package),
_ => None,
};
let Some(fqn) = fqn else {
return;
};
let span = node
.child_by_field_name("body")
.map(node_byte_span)
.unwrap_or_else(|| node_byte_span(node));
insert_widest_span(&mut out, fqn, span);
});
out
}
fn extract_non_java_function_body_spans(
file: &ParsedFile,
source: &str,
file_path: &str,
) -> HashMap<String, (usize, usize)> {
let mut out = HashMap::new();
match file.language {
LanguageId::Python => {
walk_tree(file.tree.root_node(), |node| {
if node.kind() != "function_definition" || python_node_inside_class(node) {
return;
}
let Some(logical) = python_function_logical_name(node, source) else {
return;
};
let fqn = non_java_file_scoped_fqn(file_path, &logical);
let span = node
.child_by_field_name("body")
.map(node_byte_span)
.unwrap_or_else(|| node_byte_span(node));
insert_widest_span(&mut out, fqn, span);
});
}
LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
walk_tree(file.tree.root_node(), |node| {
if node.kind() != "function_declaration" {
return;
}
let Some(name) = js_ts_function_name(node, source) else {
return;
};
let fqn = non_java_file_scoped_fqn(file_path, &name);
let span = node
.child_by_field_name("body")
.map(node_byte_span)
.unwrap_or_else(|| node_byte_span(node));
insert_widest_span(&mut out, fqn, span);
});
}
LanguageId::Rust => {
walk_tree(file.tree.root_node(), |node| {
if node.kind() != "function_item" || rust_inside_impl(node) {
return;
}
let Some(logical) = rust_function_logical_name(node, source) else {
return;
};
let fqn = non_java_file_scoped_fqn(file_path, &logical);
let span = node
.child_by_field_name("body")
.map(node_byte_span)
.unwrap_or_else(|| node_byte_span(node));
insert_widest_span(&mut out, fqn, span);
});
}
_ => {}
}
out
}
fn js_ts_function_name(node: Node, source: &str) -> Option<String> {
node.child_by_field_name("name")
.and_then(|n| node_text_slice(n, source))
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
}
fn rust_function_name(node: Node, source: &str) -> Option<String> {
node.child_by_field_name("name")
.and_then(|n| node_text_slice(n, source))
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
}
async fn code_bytes_for_span(
compressor: Option<&CompressorClient>,
source: &str,
span: Option<(usize, usize)>,
language: LanguageId,
) -> Option<Vec<u8>> {
let client = compressor?;
compress_snippet(source, span, language, client).await
}
fn walk_tree(root: Node, mut f: impl FnMut(Node)) {
let mut stack = vec![root];
while let Some(node) = stack.pop() {
f(node);
let child_count = node.child_count();
for i in (0..child_count).rev() {
if let Some(child) = node.child(i) {
stack.push(child);
}
}
}
}
fn walk_tree_skip_local_functions(root: Node, mut f: impl FnMut(Node)) {
let mut stack = vec![root];
while let Some(node) = stack.pop() {
f(node);
if node.kind() == "local_function_statement" {
continue;
}
let child_count = node.child_count();
for i in (0..child_count).rev() {
if let Some(child) = node.child(i) {
stack.push(child);
}
}
}
}
fn identifier_text_from_children(node: Node, source: &str) -> Option<String> {
let child_count = node.child_count();
for i in 0..child_count {
if let Some(child) = node.child(i) {
let kind = child.kind();
if kind == "identifier" || kind == "name" {
let start = child.start_byte() as usize;
let end = child.end_byte() as usize;
if end <= source.len() && start < end {
return Some(source[start..end].to_string());
}
}
}
}
None
}
fn extract_java_symbols(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> (Vec<ClassSymbol>, Vec<FunctionSymbol>) {
let mut classes: Vec<ClassSymbol> = Vec::new();
let mut methods: Vec<FunctionSymbol> = Vec::new();
let mut seen_class_fqns: HashSet<String> = HashSet::new();
let root = tree.root_node();
walk_tree(root, |node| {
let kind = node.kind();
match kind {
"class_declaration" | "interface_declaration" => {
if let Some(name) = identifier_text_from_children(node, source) {
let fqn = if let Some(pkg) = package {
format!("{pkg}.{name}")
} else {
name.clone()
};
if seen_class_fqns.insert(fqn.clone()) {
classes.push(ClassSymbol {
name,
fqn,
kind: None,
});
}
}
}
"method_declaration" => {
if let Some(method_name) = identifier_text_from_children(node, source) {
let mut parent = node.parent();
let mut class_fqn: Option<String> = None;
while let Some(p) = parent {
let pk = p.kind();
if pk == "class_declaration" || pk == "interface_declaration" {
if let Some(class_name) = identifier_text_from_children(p, source) {
let full = if let Some(pkg) = package {
format!("{pkg}.{class_name}")
} else {
class_name
};
class_fqn = Some(full);
}
break;
}
parent = p.parent();
}
let fqn = if let Some(ref cls) = class_fqn {
format!("{cls}.{}", method_name)
} else if let Some(pkg) = package {
format!("{pkg}.{}", method_name)
} else {
method_name.clone()
};
let (return_type, param_types, param_count) =
extract_java_method_signature_types(node, source);
methods.push(FunctionSymbol {
name: method_name,
fqn,
class_fqn,
return_type,
param_types,
param_count,
modifiers: Vec::new(),
is_pointer_receiver: None,
});
}
}
_ => {}
}
});
(classes, methods)
}
fn extract_java_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
let mut out = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() == "ERROR" || node.is_missing() {
let pos = node.start_position();
let s = node.start_byte() as usize;
let e = (node.end_byte() as usize).min(source.len());
let snippet = if s < e {
source[s..e]
.chars()
.take(120)
.collect::<String>()
.replace('\n', " ")
} else {
String::new()
};
out.push((pos.row + 1, pos.column + 1, snippet));
}
});
out
}
fn java_simple_type_name(type_text: &str) -> String {
let t = type_text.trim();
t.split('<')
.next()
.unwrap_or(t)
.trim()
.split_whitespace()
.last()
.unwrap_or(t)
.trim()
.to_string()
}
fn java_type_node_display(node: Node, source: &str) -> Option<String> {
let s = node.start_byte() as usize;
let e = (node.end_byte() as usize).min(source.len());
if s >= e {
return None;
}
Some(source[s..e].trim().to_string())
}
fn extract_java_method_signature_types(
method: Node,
source: &str,
) -> (Option<String>, Vec<String>, usize) {
let return_type = method
.child_by_field_name("type")
.and_then(|n| java_type_node_display(n, source))
.map(|full| java_simple_type_name(&full));
let mut param_types: Vec<String> = Vec::new();
if let Some(params) = method.child_by_field_name("parameters") {
let mut i = 0usize;
while let Some(child) = params.child(i) {
i += 1;
if child.kind() == "formal_parameter" || child.kind() == "spread_parameter" {
if let Some(tn) = child.child_by_field_name("type") {
if let Some(full) = java_type_node_display(tn, source) {
param_types.push(java_simple_type_name(&full));
}
}
}
}
}
let param_count = param_types.len();
(return_type, param_types, param_count)
}
fn extract_java_inheritance_edges(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> Vec<(String, String)> {
let mut edges = Vec::new();
let import_map = build_internal_import_map(source);
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "class_declaration" {
return;
}
let Some(class_name) = identifier_text_from_children(node, source) else {
return;
};
let child_fqn = match package {
Some(pkg) => format!("{pkg}.{class_name}"),
None => class_name,
};
let resolve = |type_node: Node| {
java_resolve_type_fqn(type_node, source, package, &import_map)
};
if let Some(super_c) = node.child_by_field_name("superclass") {
if let Some(p) = resolve(super_c) {
edges.push((child_fqn.clone(), p));
}
}
if let Some(ifs) = node.child_by_field_name("interfaces") {
let mut i = 0usize;
while let Some(ch) = ifs.child(i) {
i += 1;
if ch.kind() == "type_list" {
let mut j = 0usize;
while let Some(t) = ch.child(j) {
j += 1;
if let Some(p) = resolve(t) {
edges.push((child_fqn.clone(), p));
}
}
}
}
}
});
edges
}
fn java_resolve_type_fqn(
type_node: Node,
source: &str,
package: Option<&str>,
import_map: &HashMap<String, String>,
) -> Option<String> {
let s = type_node.start_byte() as usize;
let e = (type_node.end_byte() as usize).min(source.len());
if s < e && type_node.kind() == "scoped_type_identifier" {
let text = source[s..e].trim();
if text.contains('.') && !text.is_empty() {
return Some(text.to_string());
}
}
let simple: String = match type_node.kind() {
"integral_type" | "floating_point_type" | "boolean_type" | "void_type" => {
if s >= e {
return None;
}
source[s..e].trim().to_string()
}
_ => type_node
.child_by_field_name("name")
.and_then(|n| {
let s = n.start_byte() as usize;
let e = (n.end_byte() as usize).min(source.len());
if s < e {
Some(source[s..e].to_string())
} else {
None
}
})
.or_else(|| {
let mut found: Option<String> = None;
walk_tree(type_node, |n| {
if found.is_some() {
return;
}
if n.kind() == "type_identifier" {
let s = n.start_byte() as usize;
let e = (n.end_byte() as usize).min(source.len());
if s < e {
found = Some(source[s..e].to_string());
}
}
});
found
})?,
};
if is_java_primitive_or_builtin(&simple) {
return None;
}
Some(
import_map
.get(&simple)
.cloned()
.unwrap_or_else(|| match package {
Some(pkg) => format!("{pkg}.{simple}"),
None => simple,
}),
)
}
fn extract_java_class_annotations(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> Vec<(String, Vec<String>)> {
let mut out = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "class_declaration" && node.kind() != "interface_declaration" {
return;
}
let Some(class_name) = identifier_text_from_children(node, source) else {
return;
};
let class_fqn = match package {
Some(pkg) => format!("{pkg}.{class_name}"),
None => class_name,
};
let anns = java_modifiers_annotations(node, source);
if !anns.is_empty() {
out.push((class_fqn, anns));
}
});
out
}
fn extract_java_method_annotations(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> Vec<(String, Vec<String>)> {
let mut out = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "method_declaration" {
return;
}
let Some(method_name) = identifier_text_from_children(node, source) else {
return;
};
let mut parent = node.parent();
let mut class_fqn: Option<String> = None;
while let Some(p) = parent {
let pk = p.kind();
if pk == "class_declaration" || pk == "interface_declaration" {
if let Some(class_name) = identifier_text_from_children(p, source) {
class_fqn = Some(match package {
Some(pkg) => format!("{pkg}.{class_name}"),
None => class_name,
});
}
break;
}
parent = p.parent();
}
let fqn = match &class_fqn {
Some(cls) => format!("{cls}.{method_name}"),
None => match package {
Some(pkg) => format!("{pkg}.{method_name}"),
None => method_name.clone(),
},
};
let anns = java_modifiers_annotations(node, source);
if !anns.is_empty() {
out.push((fqn, anns));
}
});
out
}
fn java_modifiers_annotations(node: Node, source: &str) -> Vec<String> {
let mut names = Vec::new();
let mut i = 0usize;
while let Some(ch) = node.child(i) {
i += 1;
if ch.kind() == "modifiers" {
let mut j = 0usize;
while let Some(m) = ch.child(j) {
j += 1;
if m.kind() == "marker_annotation" || m.kind() == "annotation" {
if let Some(n) = m.named_child(0) {
if n.kind() == "identifier" || n.kind() == "scoped_identifier" {
let s = n.start_byte() as usize;
let e = (n.end_byte() as usize).min(source.len());
if s < e {
let raw = source[s..e].to_string();
let simple = raw.rsplit('.').next().unwrap_or(&raw).to_string();
names.push(simple);
}
}
}
}
}
}
}
names
}
fn java_type_has_autowired(type_node: Node, source: &str) -> bool {
let mut found = false;
walk_tree(type_node, |n| {
if n.kind() == "modifiers" {
let mut j = 0usize;
while let Some(m) = n.child(j) {
j += 1;
if m.kind() == "marker_annotation" || m.kind() == "annotation" {
if let Some(id) = m.named_child(0) {
let s = id.start_byte() as usize;
let e = (id.end_byte() as usize).min(source.len());
if s < e {
let raw = &source[s..e];
if raw == "Autowired" || raw.ends_with(".Autowired") {
found = true;
}
}
}
}
}
}
});
found
}
fn extract_java_injected_dependencies(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> Vec<(String, String)> {
let mut out = Vec::new();
let import_map = build_internal_import_map(source);
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "class_declaration" {
return;
}
let Some(class_name) = identifier_text_from_children(node, source) else {
return;
};
let class_fqn = match package {
Some(pkg) => format!("{pkg}.{class_name}"),
None => class_name,
};
let mut i = 0usize;
while let Some(body) = node.child(i) {
i += 1;
if body.kind() != "class_body" {
continue;
}
let mut j = 0usize;
while let Some(member) = body.child(j) {
j += 1;
match member.kind() {
"constructor_declaration" => {
if let Some(params) = member.child_by_field_name("parameters") {
let mut k = 0usize;
while let Some(p) = params.child(k) {
k += 1;
if p.kind() == "formal_parameter" {
if let Some(tn) = p.child_by_field_name("type") {
if let Some(fqn) =
java_resolve_type_fqn(tn, source, package, &import_map)
{
out.push((class_fqn.clone(), fqn));
}
}
}
}
}
}
"field_declaration" => {
if !java_type_has_autowired(member, source) {
continue;
}
let type_node = member
.child_by_field_name("type")
.or_else(|| {
let mut k = 0usize;
while let Some(ch) = member.child(k) {
if matches!(
ch.kind(),
"type_identifier"
| "generic_type"
| "array_type"
| "integral_type"
| "floating_point_type"
| "boolean_type"
| "void_type"
) {
return Some(ch);
}
k += 1;
}
None
});
if let Some(tn) = type_node {
if let Some(fqn) = java_resolve_type_fqn(tn, source, package, &import_map)
{
out.push((class_fqn.clone(), fqn));
}
}
}
_ => {}
}
}
}
});
out
}
fn extract_csharp_namespace_line_fallback(source: &str) -> Option<String> {
for line in source.lines() {
let trimmed = line.trim();
if trimmed.starts_with("namespace ") {
let rest = &trimmed["namespace ".len()..];
let ns = rest
.split(|c: char| c == '{' || c == ';' || c.is_whitespace())
.next()?
.trim();
if !ns.is_empty() {
return Some(ns.to_string());
}
}
}
None
}
fn extract_csharp_namespace_from_ast(tree: &Tree, source: &str) -> Option<String> {
let root = tree.root_node();
for i in 0..root.named_child_count() {
let c = root.named_child(i)?;
if c.kind() == "file_scoped_namespace_declaration" {
return csharp_namespace_declaration_name(c, source);
}
}
for i in 0..root.named_child_count() {
let c = root.named_child(i)?;
if c.kind() == "namespace_declaration" {
return csharp_namespace_declaration_name(c, source);
}
}
None
}
fn csharp_namespace_declaration_name(decl: Node, source: &str) -> Option<String> {
if !matches!(
decl.kind(),
"namespace_declaration" | "file_scoped_namespace_declaration"
) {
return None;
}
decl.child_by_field_name("name")
.and_then(|n| csharp_node_text(n, source))
.filter(|s| !s.is_empty())
}
fn csharp_enclosing_namespace_prefix(node: Node, source: &str) -> Option<String> {
let mut segments: Vec<String> = Vec::new();
let mut cur = node.parent();
while let Some(p) = cur {
if matches!(
p.kind(),
"namespace_declaration" | "file_scoped_namespace_declaration"
) {
if let Some(name) = csharp_namespace_declaration_name(p, source) {
segments.push(name);
}
}
cur = p.parent();
}
segments.reverse();
if segments.is_empty() {
None
} else {
Some(segments.join("."))
}
}
fn collect_csharp_file_namespace_strings(tree: &Tree, source: &str) -> Vec<String> {
let mut seen: HashSet<String> = HashSet::new();
let root = tree.root_node();
walk_tree(root, |n| {
if matches!(
n.kind(),
"class_declaration"
| "interface_declaration"
| "struct_declaration"
| "enum_declaration"
| "record_declaration"
| "record_struct_declaration"
) {
if let Some(p) = csharp_enclosing_namespace_prefix(n, source) {
seen.insert(p);
}
}
});
if seen.is_empty() {
if let Some(p) = extract_csharp_namespace_from_ast(tree, source) {
seen.insert(p);
} else if let Some(p) = extract_csharp_namespace_line_fallback(source) {
seen.insert(p);
}
}
let mut v: Vec<String> = seen.into_iter().collect();
v.sort();
v
}
fn extract_csharp_namespace(tree: &Tree, source: &str) -> Option<String> {
extract_csharp_namespace_from_ast(tree, source).or_else(|| extract_csharp_namespace_line_fallback(source))
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum CSharpUsingKind {
Namespace(String),
Static(String),
Alias { alias: String, target: String },
}
#[derive(Debug, Clone, Default)]
struct CSharpUsingSummary {
namespace_imports: Vec<String>,
alias_map: HashMap<String, String>,
}
fn normalize_csharp_global_prefix(s: &str) -> String {
s.replace("global::", "").trim().to_string()
}
fn is_csharp_system_or_microsoft_namespace(ns: &str) -> bool {
let ns = ns.trim();
let ns = ns.strip_prefix("global::").unwrap_or(ns).trim();
match ns.split('.').next() {
Some("System") | Some("Microsoft") => true,
_ => false,
}
}
fn parse_csharp_using_directive(node: Node, source: &str) -> Option<CSharpUsingKind> {
let start = node.start_byte() as usize;
let end = node.end_byte() as usize;
let raw = source.get(start..end)?.trim();
let mut body = raw.strip_suffix(';')?.trim();
if let Some(rest) = body.strip_prefix("global") {
if rest.starts_with(char::is_whitespace) {
body = rest.trim_start();
}
}
body = body.strip_prefix("using")?.trim();
if let Some(rest) = body.strip_prefix("static") {
if rest.starts_with(char::is_whitespace) {
let target = normalize_csharp_global_prefix(rest.trim_start());
return if target.is_empty() {
None
} else {
Some(CSharpUsingKind::Static(target))
};
}
}
if let Some(eq_pos) = body.find('=') {
let left = body[..eq_pos].trim();
let right = body[eq_pos + 1..].trim();
if !left.is_empty()
&& !right.is_empty()
&& left.chars().all(|c| c.is_alphanumeric() || c == '_')
{
return Some(CSharpUsingKind::Alias {
alias: left.to_string(),
target: normalize_csharp_global_prefix(right),
});
}
}
let ns = normalize_csharp_global_prefix(body);
if ns.is_empty() {
None
} else {
Some(CSharpUsingKind::Namespace(ns))
}
}
fn extract_csharp_using_summary(tree: &Tree, source: &str) -> CSharpUsingSummary {
let mut namespace_imports: Vec<String> = Vec::new();
let mut seen_ns: HashSet<String> = HashSet::new();
let mut alias_map: HashMap<String, String> = HashMap::new();
walk_tree(tree.root_node(), |node| {
if node.kind() != "using_directive" {
return;
}
let Some(kind) = parse_csharp_using_directive(node, source) else {
return;
};
match kind {
CSharpUsingKind::Namespace(ns) => {
if !is_csharp_system_or_microsoft_namespace(&ns) && seen_ns.insert(ns.clone()) {
namespace_imports.push(ns);
}
}
CSharpUsingKind::Static(_) => {}
CSharpUsingKind::Alias { alias, target } => {
alias_map.insert(alias, target);
}
}
});
CSharpUsingSummary {
namespace_imports,
alias_map,
}
}
#[derive(Debug, Default, Clone)]
struct CSharpBatchIndex {
namespace_to_paths: HashMap<String, Vec<String>>,
simple_name_to_fqns: HashMap<String, Vec<String>>,
class_fqns: HashSet<String>,
}
fn build_csharp_batch_index(files: &[ParsedFile], root: &Path) -> CSharpBatchIndex {
let mut namespace_to_paths: HashMap<String, Vec<String>> = HashMap::new();
let mut simple_name_to_fqns: HashMap<String, Vec<String>> = HashMap::new();
let mut class_fqns: HashSet<String> = HashSet::new();
for file in files {
if file.language != LanguageId::CSharp {
continue;
}
let path_str = neo4j_path_string(root, &file.path);
let source = &file.source;
let tree = &file.tree;
for ns in collect_csharp_file_namespace_strings(tree, source) {
namespace_to_paths
.entry(ns)
.or_default()
.push(path_str.clone());
}
let (classes, _, _) = extract_csharp_symbols(tree, source);
for c in classes {
class_fqns.insert(c.fqn.clone());
simple_name_to_fqns
.entry(c.name.clone())
.or_default()
.push(c.fqn.clone());
}
}
for v in namespace_to_paths.values_mut() {
v.sort();
v.dedup();
}
for v in simple_name_to_fqns.values_mut() {
v.sort();
v.dedup();
}
CSharpBatchIndex {
namespace_to_paths,
simple_name_to_fqns,
class_fqns,
}
}
fn csharp_effective_import_namespaces(
namespace_imports: &[String],
alias_map: &HashMap<String, String>,
class_fqns: &HashSet<String>,
) -> Vec<String> {
let mut out: Vec<String> = namespace_imports.to_vec();
for target in alias_map.values() {
if !class_fqns.contains(target) && !is_csharp_system_or_microsoft_namespace(target) {
out.push(target.clone());
}
}
out.sort();
out.dedup();
out
}
fn resolve_csharp_type_fqn(
simple: &str,
current_ns: Option<&str>,
namespace_imports: &[String],
alias_map: &HashMap<String, String>,
index: &CSharpBatchIndex,
) -> Option<String> {
if let Some(target) = alias_map.get(simple) {
if index.class_fqns.contains(target) {
return Some(target.clone());
}
}
let imports = csharp_effective_import_namespaces(namespace_imports, alias_map, &index.class_fqns);
let candidates: Vec<String> = index
.simple_name_to_fqns
.get(simple)
.cloned()
.unwrap_or_default();
let mut filtered: Vec<String> = candidates
.into_iter()
.filter(|fqn| {
let Some((decl_ns, base_name)) = fqn.rsplit_once('.') else {
return false;
};
if base_name != simple {
return false;
}
if Some(decl_ns) == current_ns {
return true;
}
imports.iter().any(|im| im.as_str() == decl_ns)
})
.collect();
if filtered.is_empty() {
return current_ns.map(|ns| format!("{ns}.{simple}"));
}
if filtered.len() == 1 {
return Some(filtered.pop().expect("one element"));
}
if let Some(ns) = current_ns {
if let Some(hit) = filtered
.iter()
.find(|fqn| fqn.rsplit_once('.').map(|(d, _)| d) == Some(ns))
{
return Some(hit.clone());
}
}
filtered.sort();
Some(filtered[0].clone())
}
fn csharp_well_known_static_type(name: &str) -> Option<&'static str> {
match name {
"Console" => Some("System.Console"),
"String" => Some("System.String"),
"Math" => Some("System.Math"),
"Object" => Some("System.Object"),
"Environment" => Some("System.Environment"),
_ => None,
}
}
fn csharp_node_text(node: Node, source: &str) -> Option<String> {
let s = node.start_byte() as usize;
let e = node.end_byte() as usize;
source.get(s..e).map(str::trim).map(String::from)
}
fn csharp_type_text_to_simple_for_resolve(type_text: &str) -> String {
let t = type_text.split('<').next().unwrap_or(type_text).trim();
t.split('.').last().unwrap_or(t).trim().to_string()
}
fn csharp_resolve_type_node_to_fqn(
type_node: Node,
source: &str,
namespace: Option<&str>,
using_summary: &CSharpUsingSummary,
index: &CSharpBatchIndex,
) -> Option<String> {
let raw = csharp_node_text(type_node, source)?;
if raw == "var" || raw.is_empty() {
return None;
}
let simple = csharp_type_text_to_simple_for_resolve(&raw);
if simple.is_empty() {
return None;
}
resolve_csharp_type_fqn(
&simple,
namespace,
&using_summary.namespace_imports,
&using_summary.alias_map,
index,
)
}
fn csharp_variable_declaration_child(node: Node) -> Option<Node> {
for i in 0..node.child_count() {
if let Some(c) = node.child(i) {
if c.kind() == "variable_declaration" {
return Some(c);
}
}
}
None
}
fn csharp_collect_variable_declaration_bindings(
var_decl: Node,
source: &str,
namespace: Option<&str>,
using_summary: &CSharpUsingSummary,
index: &CSharpBatchIndex,
out: &mut HashMap<String, String>,
) {
let Some(type_node) = var_decl.child_by_field_name("type") else {
return;
};
let Some(type_fqn) =
csharp_resolve_type_node_to_fqn(type_node, source, namespace, using_summary, index)
else {
return;
};
for i in 0..var_decl.child_count() {
let Some(child) = var_decl.child(i) else {
continue;
};
if child.kind() != "variable_declarator" {
continue;
}
let Some(id) = child.child(0).filter(|c| c.kind() == "identifier") else {
continue;
};
if let Some(name) = csharp_node_text(id, source) {
out.insert(name, type_fqn.clone());
}
}
}
fn csharp_collect_fields_for_type_declaration(
type_decl: Node,
source: &str,
namespace: Option<&str>,
using_summary: &CSharpUsingSummary,
index: &CSharpBatchIndex,
) -> HashMap<String, String> {
let mut fields = HashMap::new();
if !matches!(
type_decl.kind(),
"class_declaration"
| "interface_declaration"
| "struct_declaration"
| "enum_declaration"
| "record_declaration"
| "record_struct_declaration"
) {
return fields;
}
let Some(body) = type_decl.child_by_field_name("body") else {
return fields;
};
for i in 0..body.child_count() {
let Some(member) = body.child(i) else {
continue;
};
if member.kind() != "field_declaration" {
continue;
}
let Some(vd) = csharp_variable_declaration_child(member) else {
continue;
};
csharp_collect_variable_declaration_bindings(
vd,
source,
namespace,
using_summary,
index,
&mut fields,
);
}
fields
}
fn csharp_collect_locals_in_scope(
scope_root: Node,
source: &str,
namespace: Option<&str>,
using_summary: &CSharpUsingSummary,
index: &CSharpBatchIndex,
) -> HashMap<String, String> {
let mut locals = HashMap::new();
walk_tree(scope_root, |n| {
if n.kind() != "local_declaration_statement" {
return;
}
let Some(vd) = csharp_variable_declaration_child(n) else {
return;
};
csharp_collect_variable_declaration_bindings(
vd,
source,
namespace,
using_summary,
index,
&mut locals,
);
});
locals
}
fn csharp_type_declaration_kind_str(decl_kind: &str) -> &'static str {
match decl_kind {
"enum_declaration" => "enum",
"interface_declaration" => "interface",
"struct_declaration" => "struct",
"record_struct_declaration" => "struct",
"record_declaration" => "record",
_ => "class",
}
}
fn csharp_fqn_for_type_declaration(
decl: Node,
source: &str,
legacy_file_namespace: Option<&str>,
) -> Option<String> {
let leaf_name = identifier_text_from_children(decl, source)?;
let mut segments = vec![leaf_name];
let mut cur = decl.parent();
while let Some(p) = cur {
let pk = p.kind();
if matches!(
pk,
"class_declaration"
| "interface_declaration"
| "struct_declaration"
| "enum_declaration"
| "record_declaration"
| "record_struct_declaration"
) {
if let Some(n) = identifier_text_from_children(p, source) {
segments.push(n);
}
}
cur = p.parent();
}
segments.reverse();
let qualified = segments.join(".");
let ns = csharp_enclosing_namespace_prefix(decl, source)
.or_else(|| legacy_file_namespace.map(|s| s.to_string()));
Some(match ns {
Some(n) => format!("{n}.{qualified}"),
None => qualified,
})
}
fn csharp_innermost_enclosing_type_declaration(from: Node) -> Option<Node> {
let mut cur = from.parent();
while let Some(p) = cur {
let pk = p.kind();
if matches!(
pk,
"class_declaration"
| "interface_declaration"
| "struct_declaration"
| "enum_declaration"
| "record_declaration"
| "record_struct_declaration"
) {
return Some(p);
}
cur = p.parent();
}
None
}
fn csharp_constructor_function_fqn(class_fqn: &str, arity: usize) -> String {
format!("{class_fqn}.ctor#{arity}")
}
fn csharp_constructor_symbol_fqn(ctor: Node, class_fqn: &str, source: &str) -> (String, String) {
let modifiers = csharp_collect_method_modifiers(ctor, source);
if modifiers.iter().any(|m| m == "static") {
return ("cctor".to_string(), format!("{class_fqn}.cctor"));
}
let param_types = csharp_method_parameter_types(ctor, source);
let arity = param_types.len();
(
format!("ctor#{arity}"),
csharp_constructor_function_fqn(class_fqn, arity),
)
}
fn csharp_method_simple_name(method: Node, source: &str) -> Option<String> {
method
.child_by_field_name("name")
.and_then(|n| csharp_node_text(n, source))
.filter(|s| !s.is_empty())
.or_else(|| identifier_text_from_children(method, source))
}
fn csharp_collect_method_modifiers(method: Node, source: &str) -> Vec<String> {
let mut out = Vec::new();
for i in 0..method.child_count() {
let Some(c) = method.child(i) else {
continue;
};
if c.kind() == "modifier" {
if let Some(t) = csharp_node_text(c, source) {
if !t.is_empty() {
out.push(t);
}
}
}
}
out
}
fn csharp_method_return_type_node(method: Node) -> Option<Node> {
method
.child_by_field_name("returns")
.or_else(|| method.child_by_field_name("type"))
}
fn csharp_method_return_type_string(method: Node, source: &str) -> Option<String> {
let t = csharp_method_return_type_node(method)?;
if t.kind() == "void_keyword" {
return Some("void".to_string());
}
csharp_node_text(t, source)
}
fn csharp_method_parameter_types(method: Node, source: &str) -> Vec<String> {
let mut out = Vec::new();
let Some(params) = method.child_by_field_name("parameters") else {
return out;
};
for i in 0..params.child_count() {
let Some(p) = params.child(i) else {
continue;
};
if p.kind() != "parameter" {
continue;
}
let Some(ty) = p.child_by_field_name("type") else {
continue;
};
if let Some(s) = csharp_node_text(ty, source) {
if !s.is_empty() {
out.push(s);
}
}
}
out
}
fn csharp_enclosing_type_fqn(node: Node, source: &str, namespace: Option<&str>) -> Option<String> {
let inner = csharp_innermost_enclosing_type_declaration(node)?;
csharp_fqn_for_type_declaration(inner, source, namespace)
}
fn csharp_block_body(node: Node) -> Option<Node> {
for i in 0..node.child_count() {
if let Some(c) = node.child(i) {
if c.kind() == "block" {
return Some(c);
}
}
}
None
}
fn csharp_property_name_for_accessor(acc: Node, source: &str) -> Option<String> {
let list = acc.parent()?;
let prop = list.parent()?;
if prop.kind() != "property_declaration" {
return None;
}
let name_node = prop.child_by_field_name("name")?;
csharp_node_text(name_node, source)
}
fn csharp_accessor_kind_prefix(acc: Node, source: &str) -> &'static str {
for i in 0..acc.child_count() {
if let Some(c) = acc.child(i) {
if let Some(t) = csharp_node_text(c, source) {
match t.as_str() {
"get" => return "get",
"set" => return "set",
"init" => return "init",
_ => {}
}
}
}
}
"accessor"
}
fn csharp_method_name_from_simple_name_node(node: Node, source: &str) -> Option<String> {
match node.kind() {
"identifier" => csharp_node_text(node, source),
"generic_name" => node
.child(0)
.filter(|c| c.kind() == "identifier")
.and_then(|c| csharp_node_text(c, source)),
_ => {
let t = csharp_node_text(node, source)?;
Some(csharp_type_text_to_simple_for_resolve(&t))
}
}
}
fn csharp_unwrap_parenthesized_invoke_expr(mut expr: Node) -> Node {
while expr.kind() == "parenthesized_expression" {
if let Some(inner) = expr.child(1) {
expr = inner;
} else {
break;
}
}
expr
}
fn csharp_resolve_member_receiver_to_type_fqn(
mut receiver: Node,
source: &str,
class_fqn: Option<&str>,
field_map: &HashMap<String, String>,
local_map: &HashMap<String, String>,
namespace: Option<&str>,
using_summary: &CSharpUsingSummary,
index: &CSharpBatchIndex,
) -> Option<String> {
while receiver.kind() == "member_access_expression" {
receiver = receiver.child_by_field_name("expression")?;
}
match receiver.kind() {
"this_expression" | "base_expression" | "this" | "base" => class_fqn.map(String::from),
"identifier" => {
let name = csharp_node_text(receiver, source)?;
if let Some(t) = local_map.get(&name) {
return Some(t.clone());
}
if let Some(t) = field_map.get(&name) {
return Some(t.clone());
}
if let Some(wk) = csharp_well_known_static_type(&name) {
return Some(wk.to_string());
}
resolve_csharp_type_fqn(
&name,
namespace,
&using_summary.namespace_imports,
&using_summary.alias_map,
index,
)
}
_ => None,
}
}
fn csharp_resolve_invocation_callee_fqn(
fn_expr: Node,
source: &str,
class_fqn: Option<&str>,
field_map: &HashMap<String, String>,
local_map: &HashMap<String, String>,
namespace: Option<&str>,
using_summary: &CSharpUsingSummary,
index: &CSharpBatchIndex,
) -> Option<String> {
let fn_expr = csharp_unwrap_parenthesized_invoke_expr(fn_expr);
match fn_expr.kind() {
"identifier" => {
let name = csharp_node_text(fn_expr, source)?;
if let Some(cls) = class_fqn {
Some(format!("{cls}.{name}"))
} else if let Some(ns) = namespace {
Some(format!("{ns}.{name}"))
} else {
Some(name)
}
}
"generic_name" => {
let name = csharp_method_name_from_simple_name_node(fn_expr, source)?;
if let Some(cls) = class_fqn {
Some(format!("{cls}.{name}"))
} else if let Some(ns) = namespace {
Some(format!("{ns}.{name}"))
} else {
Some(name)
}
}
"member_access_expression" => {
let name_node = fn_expr.child_by_field_name("name")?;
let method_name = csharp_method_name_from_simple_name_node(name_node, source)?;
let recv = fn_expr.child_by_field_name("expression")?;
let recv_ty = csharp_resolve_member_receiver_to_type_fqn(
recv,
source,
class_fqn,
field_map,
local_map,
namespace,
using_summary,
index,
)?;
Some(format!("{recv_ty}.{method_name}"))
}
_ => None,
}
}
fn csharp_collect_calls_from_body(
body: Node,
caller_fqn: &str,
class_fqn: Option<&str>,
field_map: &HashMap<String, String>,
source: &str,
namespace: Option<&str>,
using_summary: &CSharpUsingSummary,
index: &CSharpBatchIndex,
calls: &mut Vec<(String, String)>,
) {
let locals = csharp_collect_locals_in_scope(body, source, namespace, using_summary, index);
walk_tree(body, |n| {
if n.kind() != "invocation_expression" {
return;
}
let Some(fn_node) = n.child_by_field_name("function") else {
return;
};
let Some(callee) = csharp_resolve_invocation_callee_fqn(
fn_node,
source,
class_fqn,
field_map,
&locals,
namespace,
using_summary,
index,
) else {
return;
};
calls.push((caller_fqn.to_string(), callee));
});
}
fn extract_csharp_symbols(
tree: &Tree,
source: &str,
) -> (
Vec<ClassSymbol>,
Vec<FunctionSymbol>,
Vec<PropertySymbol>,
) {
let mut classes: Vec<ClassSymbol> = Vec::new();
let mut methods: Vec<FunctionSymbol> = Vec::new();
let mut properties: Vec<PropertySymbol> = Vec::new();
let mut seen_class_fqns: HashSet<String> = HashSet::new();
let mut seen_property_fqns: HashSet<String> = HashSet::new();
let namespace = extract_csharp_namespace(tree, source);
let root = tree.root_node();
walk_tree(root, |node| {
let nk = node.kind();
match nk {
"class_declaration"
| "interface_declaration"
| "struct_declaration"
| "enum_declaration"
| "record_declaration"
| "record_struct_declaration" => {
let Some(name) = identifier_text_from_children(node, source) else {
return;
};
let Some(fqn) = csharp_fqn_for_type_declaration(node, source, namespace.as_deref()) else {
return;
};
if seen_class_fqns.insert(fqn.clone()) {
let kind = Some(csharp_type_declaration_kind_str(nk));
classes.push(ClassSymbol { name, fqn, kind });
}
}
"method_declaration" => {
let Some(method_name) = csharp_method_simple_name(node, source) else {
return;
};
let class_fqn = csharp_enclosing_type_fqn(node, source, namespace.as_deref());
let fqn = if let Some(ref cls) = class_fqn {
format!("{cls}.{method_name}")
} else if let Some(ref ns) = namespace {
format!("{ns}.{method_name}")
} else {
method_name.clone()
};
let param_types = csharp_method_parameter_types(node, source);
let param_count = param_types.len();
let return_type = csharp_method_return_type_string(node, source);
let modifiers = csharp_collect_method_modifiers(node, source);
methods.push(FunctionSymbol {
name: method_name,
fqn,
class_fqn,
return_type,
param_types,
param_count,
modifiers,
is_pointer_receiver: None,
});
}
"constructor_declaration" => {
let Some(class_fqn) =
csharp_enclosing_type_fqn(node, source, namespace.as_deref())
else {
return;
};
let (name, fqn) = csharp_constructor_symbol_fqn(node, &class_fqn, source);
let param_types = csharp_method_parameter_types(node, source);
let param_count = param_types.len();
let modifiers = csharp_collect_method_modifiers(node, source);
methods.push(FunctionSymbol {
name,
fqn,
class_fqn: Some(class_fqn),
return_type: None,
param_types,
param_count,
modifiers,
is_pointer_receiver: None,
});
}
"property_declaration" => {
let Some(class_fqn) =
csharp_enclosing_type_fqn(node, source, namespace.as_deref())
else {
return;
};
let Some(prop_name_node) = node.child_by_field_name("name") else {
return;
};
let Some(prop_name) = csharp_node_text(prop_name_node, source).filter(|s| !s.is_empty())
else {
return;
};
let prop_fqn = format!("{class_fqn}.{prop_name}");
if seen_property_fqns.insert(prop_fqn.clone()) {
let declared_type = node
.child_by_field_name("type")
.and_then(|t| csharp_node_text(t, source))
.filter(|s| !s.is_empty());
properties.push(PropertySymbol {
class_fqn: class_fqn.clone(),
name: prop_name.clone(),
fqn: prop_fqn,
declared_type,
});
}
let prop_type_text = node
.child_by_field_name("type")
.and_then(|t| csharp_node_text(t, source))
.filter(|s| !s.is_empty());
let Some(accessor_list) = node.child_by_field_name("accessors") else {
return;
};
for i in 0..accessor_list.child_count() {
let Some(acc) = accessor_list.child(i) else {
continue;
};
if acc.kind() != "accessor_declaration" {
continue;
}
let prefix = csharp_accessor_kind_prefix(acc, source);
let fn_name = format!("{prefix}_{prop_name}");
let fqn = format!("{class_fqn}.{fn_name}");
let return_type = match prefix {
"get" => prop_type_text.clone(),
"set" | "init" => Some("void".to_string()),
_ => prop_type_text.clone(),
};
let param_types = csharp_method_parameter_types(acc, source);
let param_count = param_types.len();
let modifiers = csharp_collect_method_modifiers(acc, source);
methods.push(FunctionSymbol {
name: fn_name,
fqn,
class_fqn: Some(class_fqn.clone()),
return_type,
param_types,
param_count,
modifiers,
is_pointer_receiver: None,
});
}
}
_ => {}
}
});
(classes, methods, properties)
}
fn csharp_for_each_unique_type_root(method: Node, seen: &mut HashSet<(usize, usize)>, f: &mut impl FnMut(Node)) {
let mut push = |n: Option<Node>| {
let Some(t) = n else {
return;
};
if t.kind() == "void_keyword" {
return;
}
let key = (t.start_byte() as usize, t.end_byte() as usize);
if seen.insert(key) {
f(t);
}
};
push(csharp_method_return_type_node(method));
walk_tree_skip_local_functions(method, |n| {
match n.kind() {
"local_declaration_statement" => {
if let Some(vd) = csharp_variable_declaration_child(n) {
push(vd.child_by_field_name("type"));
}
}
"variable_declaration" | "parameter" | "object_creation_expression" | "cast_expression"
| "default_expression" => {
push(n.child_by_field_name("type"));
}
"as_expression" | "is_expression" => {
push(n.child_by_field_name("right"));
}
"type_pattern" => {
if let Some(c) = n.named_child(0) {
push(Some(c));
}
}
_ => {}
}
});
}
fn csharp_collect_simple_type_names_from_type_node(node: Node, source: &str, out: &mut Vec<String>) {
match node.kind() {
"type_identifier" | "identifier" => {
if let Some(t) = csharp_node_text(node, source) {
out.push(t);
}
}
"generic_name" => {
for i in 0..node.child_count() {
let Some(c) = node.child(i) else {
continue;
};
match c.kind() {
"identifier" => {
if let Some(t) = csharp_node_text(c, source) {
out.push(t);
}
}
"type_argument_list" => {
for j in 0..c.named_child_count() {
if let Some(arg) = c.named_child(j) {
csharp_collect_simple_type_names_from_type_node(arg, source, out);
}
}
}
_ => {}
}
}
}
"qualified_name" => {
for i in 0..node.child_count() {
let Some(c) = node.child(i) else {
continue;
};
match c.kind() {
"qualified_name" | "alias_qualified_name" | "generic_name" | "type_identifier"
| "identifier" => {
csharp_collect_simple_type_names_from_type_node(c, source, out);
}
_ => {}
}
}
}
"alias_qualified_name" => {
for i in 0..node.child_count() {
let Some(c) = node.child(i) else {
continue;
};
if matches!(c.kind(), "generic_name" | "identifier" | "type_identifier") {
csharp_collect_simple_type_names_from_type_node(c, source, out);
}
}
}
"nullable_type" => {
if let Some(c) = node.named_child(0) {
csharp_collect_simple_type_names_from_type_node(c, source, out);
}
}
"array_type" => {
if let Some(c) = node.child_by_field_name("type") {
csharp_collect_simple_type_names_from_type_node(c, source, out);
}
}
"pointer_type" => {
if let Some(c) = node.named_child(0) {
csharp_collect_simple_type_names_from_type_node(c, source, out);
}
}
"tuple_type" => {
for i in 0..node.named_child_count() {
let Some(el) = node.named_child(i) else {
continue;
};
if el.kind() == "tuple_element" {
if let Some(tn) = el.child_by_field_name("type") {
csharp_collect_simple_type_names_from_type_node(tn, source, out);
}
}
}
}
"predefined_type" => {}
_ => {
if let Some(inner) = node.child_by_field_name("type") {
csharp_collect_simple_type_names_from_type_node(inner, source, out);
}
}
}
}
fn csharp_for_each_base_type_in_list(base_list: Node, mut f: impl FnMut(Node)) {
for i in 0..base_list.child_count() {
let Some(c) = base_list.child(i) else {
continue;
};
match c.kind() {
":" | "," => continue,
"argument_list" => continue,
_ => f(c),
}
}
}
fn csharp_type_declaration_base_list(decl: Node) -> Option<Node> {
decl.child_by_field_name("bases")
.filter(|b| !b.is_missing())
.or_else(|| {
(0..decl.named_child_count())
.filter_map(|i| decl.named_child(i))
.find(|c| c.kind() == "base_list")
})
}
fn extract_csharp_class_inheritance_edges(
tree: &Tree,
source: &str,
legacy_ns: Option<&str>,
using_summary: &CSharpUsingSummary,
index: &CSharpBatchIndex,
) -> Vec<(String, String)> {
let mut out = Vec::new();
let mut seen: HashSet<(String, String)> = HashSet::new();
let root = tree.root_node();
walk_tree(root, |n| {
if !matches!(
n.kind(),
"class_declaration" | "interface_declaration" | "struct_declaration" | "record_declaration"
) {
return;
}
let Some(derived_fqn) = csharp_fqn_for_type_declaration(n, source, legacy_ns) else {
return;
};
let Some(bases_field) = csharp_type_declaration_base_list(n) else {
return;
};
if bases_field.is_missing() {
return;
}
let ns_for = csharp_enclosing_namespace_prefix(n, source)
.or_else(|| legacy_ns.map(|s| s.to_string()));
csharp_for_each_base_type_in_list(bases_field, |ty_node| {
let mut names: Vec<String> = Vec::new();
csharp_collect_simple_type_names_from_type_node(ty_node, source, &mut names);
let Some(simple) = names.first() else {
return;
};
if is_csharp_builtin_type(simple) {
return;
}
let base_fqn = resolve_csharp_type_fqn(
simple,
ns_for.as_deref(),
&using_summary.namespace_imports,
&using_summary.alias_map,
index,
)
.unwrap_or_else(|| {
ns_for
.as_ref()
.map(|ns| format!("{ns}.{simple}"))
.unwrap_or_else(|| simple.clone())
});
if base_fqn != derived_fqn && seen.insert((derived_fqn.clone(), base_fqn.clone())) {
out.push((derived_fqn.clone(), base_fqn));
}
});
});
out
}
fn csharp_is_likely_type_parameter_name(name: &str) -> bool {
let mut it = name.chars();
match (it.next(), it.next()) {
(Some(c), None) if c.is_ascii_uppercase() => true,
_ => false,
}
}
fn extract_csharp_used_classes(
tree: &Tree,
source: &str,
namespace: Option<&str>,
using_summary: &CSharpUsingSummary,
csharp_index: &CSharpBatchIndex,
) -> Vec<(String, String)> {
let mut uses: Vec<(String, String)> = Vec::new();
let mut pair_seen: HashSet<(String, String)> = HashSet::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "method_declaration" {
return;
}
let Some(method_name) = csharp_method_simple_name(node, source) else {
return;
};
let effective_ns = csharp_enclosing_namespace_prefix(node, source)
.or_else(|| namespace.map(|s| s.to_string()));
let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
let func_fqn = if let Some(ref cls) = class_fqn {
format!("{cls}.{method_name}")
} else if let Some(ref ns) = effective_ns {
format!("{ns}.{method_name}")
} else {
method_name
};
let mut root_seen: HashSet<(usize, usize)> = HashSet::new();
let mut simple_names: Vec<String> = Vec::new();
csharp_for_each_unique_type_root(node, &mut root_seen, &mut |tr| {
csharp_collect_simple_type_names_from_type_node(tr, source, &mut simple_names);
});
for type_name in simple_names {
if is_csharp_builtin_type(&type_name) {
continue;
}
if csharp_is_likely_type_parameter_name(&type_name)
&& !using_summary.alias_map.contains_key(&type_name)
{
continue;
}
if !type_name
.chars()
.next()
.map(|c| c.is_uppercase())
.unwrap_or(false)
{
continue;
}
let type_fqn = resolve_csharp_type_fqn(
&type_name,
effective_ns.as_deref(),
&using_summary.namespace_imports,
&using_summary.alias_map,
csharp_index,
)
.unwrap_or_else(|| {
effective_ns
.as_ref()
.map(|ns| format!("{ns}.{type_name}"))
.unwrap_or_else(|| type_name.clone())
});
if pair_seen.insert((func_fqn.clone(), type_fqn.clone())) {
uses.push((func_fqn.clone(), type_fqn));
}
}
});
uses
}
fn is_csharp_primitive_or_alias(name: &str) -> bool {
matches!(
name,
"int" | "long" | "short" | "byte" | "float" | "double" | "decimal"
| "bool" | "char" | "string" | "object" | "void" | "dynamic" | "var"
| "nint" | "nuint"
| "Int32" | "Int64" | "Int16" | "UInt32" | "UInt64" | "UInt16"
| "Byte" | "SByte" | "Single" | "Double" | "Decimal"
| "Boolean" | "Char" | "String" | "Object" | "Void"
)
}
fn is_csharp_common_bcl_or_framework_type(name: &str) -> bool {
matches!(
name,
"List" | "Dictionary" | "IEnumerable" | "IEnumerator" | "IList" | "ICollection" | "IDictionary"
| "IReadOnlyList" | "IReadOnlyCollection" | "IReadOnlyDictionary" | "ISet" | "HashSet"
| "SortedSet" | "Queue" | "Stack" | "LinkedList" | "SortedList" | "ConcurrentBag"
| "ConcurrentQueue" | "ConcurrentStack" | "ConcurrentDictionary" | "ObservableCollection"
| "ImmutableArray" | "ImmutableList" | "ImmutableDictionary" | "IOrderedEnumerable"
| "Task" | "ValueTask" | "CancellationToken" | "CancellationTokenSource"
| "IAsyncEnumerable" | "IAsyncEnumerator" | "Parallel" | "Thread" | "Interlocked"
| "Func" | "Action" | "MulticastDelegate" | "Delegate"
| "DateTime" | "DateTimeOffset" | "TimeSpan" | "DateOnly" | "TimeOnly" | "Guid"
| "StringBuilder" | "Encoding" | "UTF8Encoding" | "ASCIIEncoding" | "UnicodeEncoding"
| "Stream" | "MemoryStream" | "FileStream" | "BufferedStream" | "TextReader" | "TextWriter"
| "StringReader" | "StringWriter" | "BinaryReader" | "BinaryWriter" | "File" | "Path"
| "Directory" | "Environment"
| "Uri" | "Version" | "Type" | "Enum" | "Array" | "Nullable" | "Lazy" | "Tuple"
| "ValueTuple" | "Console" | "Math" | "Convert" | "BitConverter" | "GC" | "WeakReference"
| "RuntimeHelpers" | "Activator"
| "Exception" | "ArgumentException" | "ArgumentNullException" | "ArgumentOutOfRangeException"
| "InvalidOperationException" | "NotSupportedException" | "NotImplementedException"
| "IOException" | "UnauthorizedAccessException" | "TimeoutException" | "AggregateException"
| "OperationCanceledException" | "ObjectDisposedException" | "FormatException"
| "Span" | "ReadOnlySpan" | "Memory" | "ReadOnlyMemory"
| "IDisposable" | "IAsyncDisposable" | "IComparable" | "IEquatable" | "IFormattable"
| "IServiceProvider" | "ILogger" | "IConfiguration" | "IHost" | "IHostedService"
| "IHttpClientFactory" | "HttpClient" | "HttpRequestMessage" | "HttpResponseMessage"
)
}
fn is_csharp_builtin_type(name: &str) -> bool {
is_csharp_primitive_or_alias(name) || is_csharp_common_bcl_or_framework_type(name)
}
fn extract_csharp_calls(
tree: &Tree,
source: &str,
namespace: Option<&str>,
using_summary: &CSharpUsingSummary,
csharp_index: &CSharpBatchIndex,
) -> Vec<(String, String)> {
let mut calls: Vec<(String, String)> = Vec::new();
let root = tree.root_node();
let mut class_field_maps: HashMap<String, HashMap<String, String>> = HashMap::new();
walk_tree(root, |node| {
if matches!(
node.kind(),
"class_declaration"
| "interface_declaration"
| "struct_declaration"
| "enum_declaration"
| "record_declaration"
| "record_struct_declaration"
) {
if let Some(cfqn) = csharp_fqn_for_type_declaration(node, source, namespace) {
let ns_for = csharp_enclosing_namespace_prefix(node, source)
.or_else(|| namespace.map(|s| s.to_string()));
let fm = csharp_collect_fields_for_type_declaration(
node,
source,
ns_for.as_deref(),
using_summary,
csharp_index,
);
class_field_maps.insert(cfqn, fm);
}
}
});
let empty_fields: HashMap<String, String> = HashMap::new();
walk_tree(root, |node| {
match node.kind() {
"method_declaration" => {
let Some(method_name) = csharp_method_simple_name(node, source) else {
return;
};
let effective_ns = csharp_enclosing_namespace_prefix(node, source)
.or_else(|| namespace.map(|s| s.to_string()));
let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
let caller_fqn = match &class_fqn {
Some(cf) => format!("{cf}.{method_name}"),
None => effective_ns
.as_ref()
.map(|ns| format!("{ns}.{method_name}"))
.unwrap_or(method_name),
};
let Some(body) = csharp_block_body(node) else {
return;
};
let field_map = class_fqn
.as_ref()
.and_then(|c| class_field_maps.get(c))
.unwrap_or(&empty_fields);
csharp_collect_calls_from_body(
body,
&caller_fqn,
class_fqn.as_deref(),
field_map,
source,
effective_ns.as_deref(),
using_summary,
csharp_index,
&mut calls,
);
}
"constructor_declaration" => {
let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
let Some(cf) = class_fqn.clone() else {
return;
};
let effective_ns = csharp_enclosing_namespace_prefix(node, source)
.or_else(|| namespace.map(|s| s.to_string()));
let (_, caller_fqn) = csharp_constructor_symbol_fqn(node, &cf, source);
let Some(body) = csharp_block_body(node) else {
return;
};
let field_map = class_field_maps.get(&cf).unwrap_or(&empty_fields);
csharp_collect_calls_from_body(
body,
&caller_fqn,
Some(cf.as_str()),
field_map,
source,
effective_ns.as_deref(),
using_summary,
csharp_index,
&mut calls,
);
}
"accessor_declaration" => {
let class_fqn = csharp_enclosing_type_fqn(node, source, namespace);
let Some(cf) = class_fqn.clone() else {
return;
};
let effective_ns = csharp_enclosing_namespace_prefix(node, source)
.or_else(|| namespace.map(|s| s.to_string()));
let Some(prop_name) = csharp_property_name_for_accessor(node, source) else {
return;
};
let prefix = csharp_accessor_kind_prefix(node, source);
let caller_fqn = format!("{cf}.{}_{}", prefix, prop_name);
let Some(body) = csharp_block_body(node) else {
return;
};
let field_map = class_field_maps.get(&cf).unwrap_or(&empty_fields);
csharp_collect_calls_from_body(
body,
&caller_fqn,
Some(cf.as_str()),
field_map,
source,
effective_ns.as_deref(),
using_summary,
csharp_index,
&mut calls,
);
}
_ => {}
}
});
calls
}
fn csharp_string_value_from_literal_node(node: Node, source: &str) -> Option<String> {
let s = source.get(node.start_byte() as usize..node.end_byte() as usize)?;
let t = s.trim();
match node.kind() {
"string_literal" => {
if t.starts_with('"') && t.ends_with('"') && t.len() >= 2 {
Some(t[1..t.len() - 1].to_string())
} else {
None
}
}
"verbatim_string_literal" => {
if t.starts_with("@\"") && t.ends_with('"') && t.len() >= 3 {
Some(t[2..t.len() - 1].replace("\"\"", "\""))
} else {
None
}
}
_ => None,
}
}
fn csharp_first_string_in_subtree(root: Node, source: &str) -> Option<String> {
let mut stack = vec![root];
while let Some(n) = stack.pop() {
match n.kind() {
"string_literal" | "verbatim_string_literal" => {
return csharp_string_value_from_literal_node(n, source);
}
_ => {
let cc = n.child_count();
for i in (0..cc).rev() {
if let Some(c) = n.child(i) {
stack.push(c);
}
}
}
}
}
None
}
fn csharp_first_string_in_attribute(attr: Node, source: &str) -> Option<String> {
for i in 0..attr.child_count() {
let Some(c) = attr.child(i) else {
continue;
};
if c.kind() != "attribute_argument_list" {
continue;
}
for j in 0..c.named_child_count() {
let Some(arg) = c.named_child(j) else {
continue;
};
if arg.kind() == "attribute_argument" {
if let Some(s) = csharp_first_string_in_subtree(arg, source) {
return Some(s);
}
}
}
}
None
}
fn csharp_attribute_simple_name_from_name_node(n: Node, source: &str) -> Option<String> {
match n.kind() {
"identifier" => csharp_node_text(n, source),
"generic_name" => {
let id = n.child(0).filter(|c| c.kind() == "identifier")?;
csharp_node_text(id, source)
}
"qualified_name" | "alias_qualified_name" => {
let mut last = None;
for i in 0..n.child_count() {
let Some(c) = n.child(i) else {
continue;
};
if !c.is_named() {
continue;
}
if let Some(s) = csharp_attribute_simple_name_from_name_node(c, source) {
last = Some(s);
}
}
last
}
_ => None,
}
}
fn csharp_route_path_from_str(path: &str) -> Option<String> {
let path = path.trim();
if path.is_empty() {
None
} else if path.starts_with('/') {
Some(path.to_string())
} else {
Some(format!("/{}", path))
}
}
fn csharp_type_level_route_template(type_node: Node, source: &str) -> Option<String> {
let mut last: Option<String> = None;
for i in 0..type_node.child_count() {
let Some(c) = type_node.child(i) else {
continue;
};
if c.kind() != "attribute_list" {
continue;
}
for j in 0..c.child_count() {
let Some(attr) = c.child(j) else {
continue;
};
if attr.kind() != "attribute" {
continue;
}
let Some(name_n) = attr.child_by_field_name("name") else {
continue;
};
let Some(simple) = csharp_attribute_simple_name_from_name_node(name_n, source) else {
continue;
};
if simple != "Route" {
continue;
}
if let Some(raw) = csharp_first_string_in_attribute(attr, source) {
last = csharp_route_path_from_str(&raw);
}
}
}
last
}
fn csharp_enclosing_route_type_for_method(method: Node) -> Option<Node> {
let mut p = method.parent();
while let Some(n) = p {
let k = n.kind();
if k == "class_declaration" || k == "record_declaration" {
return Some(n);
}
p = n.parent();
}
None
}
fn csharp_push_http_verb(verbs: &mut Vec<String>, v: &str) {
if !verbs.iter().any(|x| x == v) {
verbs.push(v.to_string());
}
}
fn csharp_parse_method_api_attributes(
method: Node,
source: &str,
) -> (Vec<String>, Option<String>, Option<String>) {
let mut verbs: Vec<String> = Vec::new();
let mut http_template: Option<String> = None;
let mut route_attr: Option<String> = None;
const VERB_ATTRS: &[(&str, &str)] = &[
("HttpGet", "GET"),
("HttpPost", "POST"),
("HttpPut", "PUT"),
("HttpDelete", "DELETE"),
("HttpPatch", "PATCH"),
("HttpHead", "HEAD"),
];
for i in 0..method.child_count() {
let Some(c) = method.child(i) else {
continue;
};
if c.kind() != "attribute_list" {
continue;
}
for j in 0..c.child_count() {
let Some(attr) = c.child(j) else {
continue;
};
if attr.kind() != "attribute" {
continue;
}
let Some(name_n) = attr.child_by_field_name("name") else {
continue;
};
let Some(simple) = csharp_attribute_simple_name_from_name_node(name_n, source) else {
continue;
};
if simple == "Route" {
if let Some(raw) = csharp_first_string_in_attribute(attr, source) {
route_attr = csharp_route_path_from_str(&raw);
}
continue;
}
if let Some((_, verb)) = VERB_ATTRS.iter().find(|(a, _)| *a == simple.as_str()) {
csharp_push_http_verb(&mut verbs, verb);
if let Some(raw) = csharp_first_string_in_attribute(attr, source) {
http_template = csharp_route_path_from_str(&raw);
}
}
}
}
(verbs, http_template, route_attr)
}
fn csharp_join_route_parts(
class_route: Option<&str>,
method_template: Option<&str>,
method_name: &str,
) -> String {
fn strip_slashes(s: &str) -> &str {
s.trim().trim_start_matches('/').trim_end_matches('/')
}
let method_part = method_template
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.map(strip_slashes)
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
.unwrap_or_else(|| method_name.trim().to_string());
let Some(base_raw) = class_route.map(str::trim).filter(|s| !s.is_empty()) else {
let m = strip_slashes(&method_part);
return format!("/{}", m);
};
let base = strip_slashes(base_raw);
if base.is_empty() {
return format!("/{}", strip_slashes(&method_part));
}
format!("/{}/{}", base, strip_slashes(&method_part))
}
fn extract_csharp_api_endpoints_from_tree(tree: &Tree, source: &str) -> Vec<(Vec<String>, String, String)> {
let mut endpoints: Vec<(Vec<String>, String, String)> = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "method_declaration" {
return;
}
let method_name = node
.child_by_field_name("name")
.and_then(|n| csharp_node_text(n, source))
.or_else(|| identifier_text_from_children(node, source));
let Some(method_name) = method_name else {
return;
};
let (verbs, http_template, route_attr) = csharp_parse_method_api_attributes(node, source);
if verbs.is_empty() && http_template.is_none() && route_attr.is_none() {
return;
}
let methods_http = if verbs.is_empty() {
vec!["ANY".to_string()]
} else {
verbs
};
let method_segment = http_template.or(route_attr);
let class_route = csharp_enclosing_route_type_for_method(node)
.and_then(|t| csharp_type_level_route_template(t, source));
let path_template = csharp_join_route_parts(
class_route.as_deref(),
method_segment.as_deref(),
&method_name,
);
endpoints.push((methods_http, path_template, method_name));
});
endpoints
}
fn build_internal_import_map(source: &str) -> HashMap<String, String> {
let mut map = HashMap::new();
for fqn in extract_internal_java_imports(source) {
if let Some(simple) = fqn.rsplit('.').next() {
map.insert(simple.to_string(), fqn.clone());
}
}
map
}
fn collect_local_var_types_for_method(
method_node: Node,
source: &str,
package: Option<&str>,
import_map: &HashMap<String, String>,
) -> HashMap<String, String> {
let mut vars: HashMap<String, String> = HashMap::new();
walk_tree(method_node, |node| {
if node.kind() != "local_variable_declaration" {
return;
}
let start = node.start_byte() as usize;
let end = node.end_byte() as usize;
if end > source.len() || start >= end {
return;
}
let stmt = &source[start..end];
let before_eq = stmt.split('=').next().unwrap_or("").trim();
let mut parts = before_eq.split_whitespace();
let type_part = match parts.next() {
Some(t) => t,
None => return,
};
let var_part = match parts.next() {
Some(v) => v,
None => return,
};
let type_simple = type_part
.split('<')
.next()
.unwrap_or(type_part)
.split('.')
.last()
.unwrap_or(type_part)
.trim();
let var_name = var_part
.trim_end_matches(';')
.trim_end_matches(',')
.trim();
if var_name.is_empty() || type_simple.is_empty() {
return;
}
let fqn = if let Some(import_fqn) = import_map.get(type_simple) {
import_fqn.clone()
} else if let Some(pkg_prefix) = import_map
.values()
.find(|v| v.ends_with(".*"))
{
let base = pkg_prefix.trim_end_matches(".*");
format!("{base}.{type_simple}")
} else if let Some(pkg) = package {
format!("{pkg}.{type_simple}")
} else {
type_simple.to_string()
};
vars.insert(var_name.to_string(), fqn);
});
vars
}
fn collect_identifiers(node: Node, source: &str, out: &mut Vec<String>) {
walk_tree(node, |n| {
if n.kind() == "identifier" {
let start = n.start_byte() as usize;
let end = n.end_byte() as usize;
if end <= source.len() && start < end {
out.push(source[start..end].to_string());
}
}
});
}
fn extract_java_calls(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> Vec<(String, String)> {
let mut calls: Vec<(String, String)> = Vec::new();
let root = tree.root_node();
let import_map = build_internal_import_map(source);
walk_tree(root, |node| {
if node.kind() != "method_declaration" {
return;
}
let method_name = match identifier_text_from_children(node, source) {
Some(name) => name,
None => return,
};
let mut parent = node.parent();
let mut class_fqn: Option<String> = None;
while let Some(p) = parent {
let pk = p.kind();
if pk == "class_declaration" || pk == "interface_declaration" {
if let Some(class_name) = identifier_text_from_children(p, source) {
let full = if let Some(pkg) = package {
format!("{pkg}.{class_name}")
} else {
class_name
};
class_fqn = Some(full);
}
break;
}
parent = p.parent();
}
let caller_fqn = if let Some(ref cls) = class_fqn {
format!("{cls}.{}", method_name)
} else if let Some(pkg) = package {
format!("{pkg}.{}", method_name)
} else {
method_name.clone()
};
let local_var_types =
collect_local_var_types_for_method(node, source, package, &import_map);
walk_tree(node, |child| {
if child.kind() != "method_invocation" {
return;
}
let callee_name = if let Some(name_node) = child.child_by_field_name("name") {
let start = name_node.start_byte() as usize;
let end = name_node.end_byte() as usize;
if end <= source.len() && start < end {
source[start..end].to_string()
} else {
match identifier_text_from_children(child, source) {
Some(name) => name,
None => return,
}
}
} else {
match identifier_text_from_children(child, source) {
Some(name) => name,
None => return,
}
};
let mut receiver_type_fqn: Option<String> = None;
if let Some(object_node) = child.child_by_field_name("object") {
let mut recv_idents: Vec<String> = Vec::new();
collect_identifiers(object_node, source, &mut recv_idents);
for ident in &recv_idents {
if let Some(ty) = local_var_types.get(ident) {
receiver_type_fqn = Some(ty.clone());
break;
}
}
if receiver_type_fqn.is_none() {
if let Some(first_ident) = recv_idents.first() {
if let Some(import_fqn) = import_map.get(first_ident) {
receiver_type_fqn = Some(import_fqn.clone());
} else if let Some(pkg_prefix) = import_map
.values()
.find(|v| v.ends_with(".*"))
{
let base = pkg_prefix.trim_end_matches(".*");
receiver_type_fqn =
Some(format!("{base}.{first_ident}"));
} else if let Some(pkg) = package {
receiver_type_fqn =
Some(format!("{pkg}.{first_ident}"));
}
}
}
}
let callee_fqn = if let Some(ref recv_ty) = receiver_type_fqn {
format!("{recv_ty}.{}", callee_name)
} else if let Some(ref cls) = class_fqn {
format!("{cls}.{}", callee_name)
} else if let Some(pkg) = package {
format!("{pkg}.{}", callee_name)
} else {
callee_name.clone()
};
calls.push((caller_fqn.clone(), callee_fqn));
});
});
calls
}
fn node_text_slice(n: Node, source: &str) -> Option<String> {
let s = n.start_byte() as usize;
let e = (n.end_byte() as usize).min(source.len());
(s < e).then(|| source[s..e].to_string())
}
fn non_java_file_scoped_fqn(file_path: &str, logical_name: &str) -> String {
format!("{file_path}::{logical_name}")
}
fn non_java_short_name_and_depth(language: LanguageId, logical: &str) -> (String, usize) {
match language {
LanguageId::Rust => {
let short = logical
.rsplit("::")
.next()
.unwrap_or(logical)
.to_string();
(short, logical.matches("::").count())
}
_ => {
let short = logical
.rsplit_once('.')
.map(|(_, s)| s)
.unwrap_or(logical)
.to_string();
(short, logical.matches('.').count())
}
}
}
fn rust_inside_impl(node: Node) -> bool {
let mut cur = node.parent();
while let Some(p) = cur {
if p.kind() == "impl_item" {
return true;
}
cur = p.parent();
}
false
}
fn rust_enclosing_mod_prefixes(fn_node: Node, source: &str) -> Vec<String> {
let mut prefixes = Vec::new();
let mut cur = fn_node.parent();
while let Some(p) = cur {
if p.kind() == "mod_item" {
if let Some(name_n) = p.child_by_field_name("name") {
if let Some(name) = node_text_slice(name_n, source) {
let name = name.trim();
if !name.is_empty() {
prefixes.insert(0, name.to_string());
}
}
}
}
cur = p.parent();
}
prefixes
}
fn rust_function_logical_name(fn_node: Node, source: &str) -> Option<String> {
let name = rust_function_name(fn_node, source)?;
let prefixes = rust_enclosing_mod_prefixes(fn_node, source);
Some(if prefixes.is_empty() {
name
} else {
format!("{}::{}", prefixes.join("::"), name)
})
}
fn rust_innermost_enclosing_function(call: Node) -> Option<Node> {
let mut cur = call.parent();
while let Some(p) = cur {
if p.kind() == "function_item" && !rust_inside_impl(p) {
return Some(p);
}
cur = p.parent();
}
None
}
fn extract_rust_graph_symbols(tree: &Tree, source: &str, file_path: &str) -> Vec<FunctionSymbol> {
let mut out = Vec::new();
let mut seen = HashSet::new();
walk_tree(tree.root_node(), |node| {
if node.kind() != "function_item" || rust_inside_impl(node) {
return;
}
let Some(logical) = rust_function_logical_name(node, source) else {
return;
};
let fqn = non_java_file_scoped_fqn(file_path, &logical);
if !seen.insert(fqn.clone()) {
return;
}
let name = rust_function_name(node, source).unwrap_or_default();
out.push(FunctionSymbol {
name,
fqn,
class_fqn: None,
return_type: None,
param_types: Vec::new(),
param_count: 0,
modifiers: Vec::new(),
is_pointer_receiver: None,
});
});
out
}
fn extract_rust_intrafile_calls(
tree: &Tree,
source: &str,
file_path: &str,
name_to_fqn: &HashMap<String, String>,
) -> Vec<(String, String)> {
let mut calls = Vec::new();
walk_tree(tree.root_node(), |inner| {
if inner.kind() != "call_expression" {
return;
}
let Some(encl) = rust_innermost_enclosing_function(inner) else {
return;
};
let Some(logical) = rust_function_logical_name(encl, source) else {
return;
};
let caller_fqn = non_java_file_scoped_fqn(file_path, &logical);
let Some(func_n) = inner.child_by_field_name("function") else {
return;
};
if func_n.kind() != "identifier" {
return;
}
let Some(callee_name) = node_text_slice(func_n, source) else {
return;
};
let callee_name = callee_name.trim();
if let Some(callee_fqn) = name_to_fqn.get(callee_name) {
calls.push((caller_fqn, callee_fqn.clone()));
}
});
calls
}
fn rust_use_path_segments_from_node(node: Node, source: &str) -> Option<Vec<String>> {
let text = node_text_slice(node, source)?;
let text = text.split('{').next()?.trim();
let text = text.trim_end_matches(';').trim();
if text.is_empty() {
return None;
}
let segs: Vec<String> = text
.split("::")
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty() && s != "*" && s != "as")
.collect();
if segs.is_empty() {
None
} else {
Some(segs)
}
}
fn extract_rust_use_paths(tree: &Tree, source: &str) -> Vec<Vec<String>> {
let mut out = Vec::new();
walk_tree(tree.root_node(), |node| {
if node.kind() != "use_declaration" {
return;
}
walk_tree(node, |inner| {
if inner.kind() != "scoped_identifier" && inner.kind() != "use_tree" {
return;
}
if let Some(segs) = rust_use_path_segments_from_node(inner, source) {
if !segs.is_empty() && !out.contains(&segs) {
out.push(segs);
}
}
});
});
out
}
fn rust_file_to_module_path(file_path: &str) -> Vec<String> {
let p = file_path.replace('\\', "/");
let p = p.strip_prefix("src/").unwrap_or(p.as_str());
if p == "lib.rs" || p == "main.rs" {
return Vec::new();
}
if let Some(dir) = p.strip_suffix("/mod.rs") {
if dir.is_empty() {
return Vec::new();
}
return dir.split('/').map(String::from).collect();
}
if let Some(stem) = p.strip_suffix(".rs") {
if stem.is_empty() {
return Vec::new();
}
return stem.split('/').map(String::from).collect();
}
Vec::new()
}
fn rust_module_path_to_file_candidates(segments: &[String]) -> Vec<String> {
if segments.is_empty() {
return vec!["src/lib.rs".to_string(), "src/main.rs".to_string()];
}
let joined = segments.join("/");
vec![
format!("src/{joined}.rs"),
format!("src/{joined}/mod.rs"),
]
}
fn resolve_rust_use_to_known_file(
use_segments: &[String],
current_file: &str,
known_paths: &HashSet<String>,
) -> Option<String> {
if use_segments.is_empty() {
return None;
}
let mut mod_path = rust_file_to_module_path(current_file);
let mut i = 0usize;
match use_segments[0].as_str() {
"crate" => {
i = 1;
mod_path.clear();
}
"super" => {
i = 1;
if !mod_path.is_empty() {
mod_path.pop();
}
}
"self" => {
i = 1;
}
_ => {}
}
let rest: Vec<String> = use_segments[i..].to_vec();
for len in (0..=rest.len()).rev() {
let mut target = mod_path.clone();
target.extend(rest[..len].iter().cloned());
for cand in rust_module_path_to_file_candidates(&target) {
if known_paths.contains(&cand) {
return Some(cand);
}
}
}
None
}
fn extract_non_java_function_symbols(
file: &ParsedFile,
source: &str,
file_path: &str,
) -> Vec<FunctionSymbol> {
match file.language {
LanguageId::Rust => extract_rust_graph_symbols(&file.tree, source, file_path),
LanguageId::Python => extract_python_graph_symbols(&file.tree, source, file_path),
LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
extract_js_ts_graph_symbols(&file.tree, source, file_path, file.language)
}
_ => Vec::new(),
}
}
fn extract_python_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
let mut out = Vec::new();
walk_tree(tree.root_node(), |node| {
if node.kind() == "ERROR" || node.is_missing() {
let pos = node.start_position();
let s = node.start_byte() as usize;
let e = (node.end_byte() as usize).min(source.len());
let snippet = if s < e {
source[s..e]
.chars()
.take(120)
.collect::<String>()
.replace('\n', " ")
} else {
String::new()
};
out.push((pos.row + 1, pos.column + 1, snippet));
}
});
out
}
fn extract_js_ts_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
extract_python_parse_warnings(tree, source)
}
fn python_node_inside_class(mut node: Node) -> bool {
while let Some(p) = node.parent() {
if p.kind() == "class_definition" {
return true;
}
node = p;
}
false
}
fn python_function_definition_name(fn_node: Node, source: &str) -> Option<String> {
fn_node
.child_by_field_name("name")
.and_then(|n| node_text_slice(n, source))
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
}
fn python_enclosing_function_prefixes(fn_node: Node, source: &str) -> Vec<String> {
let mut prefixes = Vec::new();
let mut cur = fn_node.parent();
while let Some(p) = cur {
if p.kind() == "function_definition" && !python_node_inside_class(p) {
if let Some(n) = python_function_definition_name(p, source) {
prefixes.insert(0, n);
}
}
cur = p.parent();
}
prefixes
}
fn python_function_logical_name(fn_node: Node, source: &str) -> Option<String> {
let name = python_function_definition_name(fn_node, source)?;
let prefixes = python_enclosing_function_prefixes(fn_node, source);
Some(if prefixes.is_empty() {
name
} else {
format!("{}.{}", prefixes.join("."), name)
})
}
fn extract_python_graph_symbols(tree: &Tree, source: &str, file_path: &str) -> Vec<FunctionSymbol> {
let mut out = Vec::new();
let mut seen = HashSet::new();
walk_tree(tree.root_node(), |node| {
if node.kind() != "function_definition" {
return;
}
if python_node_inside_class(node) {
return;
}
let Some(logical) = python_function_logical_name(node, source) else {
return;
};
let fqn = non_java_file_scoped_fqn(file_path, &logical);
if !seen.insert(fqn.clone()) {
return;
}
let name = python_function_definition_name(node, source).unwrap_or_default();
out.push(FunctionSymbol {
name,
fqn,
class_fqn: None,
return_type: None,
param_types: Vec::new(),
param_count: 0,
modifiers: Vec::new(),
is_pointer_receiver: None,
});
});
out
}
fn python_innermost_enclosing_function(call: Node) -> Option<Node> {
let mut cur = call.parent();
while let Some(p) = cur {
if p.kind() == "function_definition" && !python_node_inside_class(p) {
return Some(p);
}
cur = p.parent();
}
None
}
fn extract_python_intrafile_calls(
tree: &Tree,
source: &str,
file_path: &str,
name_to_fqn: &HashMap<String, String>,
) -> Vec<(String, String)> {
let mut calls = Vec::new();
walk_tree(tree.root_node(), |inner| {
if inner.kind() != "call" {
return;
}
let Some(encl) = python_innermost_enclosing_function(inner) else {
return;
};
let Some(logical) = python_function_logical_name(encl, source) else {
return;
};
let caller_fqn = non_java_file_scoped_fqn(file_path, &logical);
let Some(func_n) = inner.child_by_field_name("function") else {
return;
};
if func_n.kind() != "identifier" {
return;
}
let Some(callee_name) = node_text_slice(func_n, source) else {
return;
};
let callee_name = callee_name.trim();
if let Some(callee_fqn) = name_to_fqn.get(callee_name) {
calls.push((caller_fqn, callee_fqn.clone()));
}
});
calls
}
fn extract_python_import_modules(tree: &Tree, source: &str) -> Vec<String> {
let mut out = Vec::new();
walk_tree(tree.root_node(), |node| match node.kind() {
"import_from_statement" => {
if let Some(mod_n) = node.child_by_field_name("module_name") {
let t = node_text_slice(mod_n, source).unwrap_or_default();
let t = t.trim();
if !t.is_empty() && t != "." && !t.starts_with('.') {
out.push(t.to_string());
}
}
}
"import_statement" => {
let mut c = node.walk();
if !c.goto_first_child() {
return;
}
loop {
let ch = c.node();
match ch.kind() {
"dotted_name" => {
if let Some(t) = node_text_slice(ch, source) {
let t = t.trim();
if !t.is_empty() {
out.push(t.to_string());
}
}
}
"aliased_import" => {
if let Some(name_n) = ch.child_by_field_name("name") {
if name_n.kind() == "dotted_name" {
if let Some(t) = node_text_slice(name_n, source) {
let t = t.trim();
if !t.is_empty() {
out.push(t.to_string());
}
}
}
}
}
_ => {}
}
if !c.goto_next_sibling() {
break;
}
}
}
_ => {}
});
out.sort();
out.dedup();
out
}
fn resolve_python_import_to_known_file(
module_path: &str,
known_paths: &HashSet<String>,
) -> Option<String> {
let norm = module_path.trim();
if norm.is_empty() {
return None;
}
let needle = norm.replace('.', "/");
known_paths
.iter()
.filter(|p| {
let pn = p.replace('\\', "/");
pn.ends_with(".py") && pn.contains(needle.as_str())
})
.min_by_key(|p| p.len())
.cloned()
}
fn js_inside_class(mut node: Node) -> bool {
while let Some(p) = node.parent() {
if p.kind() == "class_declaration" {
return true;
}
node = p;
}
false
}
fn js_function_declaration_name(node: Node, source: &str) -> Option<String> {
node.child_by_field_name("name")
.and_then(|n| node_text_slice(n, source))
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
}
fn js_nested_function_declaration_prefix(fn_node: Node, source: &str, self_name: &str) -> String {
let mut prefixes = Vec::new();
let mut cur = fn_node.parent();
while let Some(p) = cur {
if p.kind() == "function_declaration" && !js_inside_class(p) {
if let Some(n) = js_function_declaration_name(p, source) {
prefixes.insert(0, n);
}
}
cur = p.parent();
}
if prefixes.is_empty() {
self_name.to_string()
} else {
format!("{}.{}", prefixes.join("."), self_name)
}
}
fn js_prefix_from_ancestors_for_expr(expr_node: Node, source: &str, var_name: &str) -> String {
let mut prefixes = Vec::new();
let mut cur = expr_node.parent();
while let Some(p) = cur {
if p.kind() == "function_declaration" && !js_inside_class(p) {
if let Some(n) = js_function_declaration_name(p, source) {
prefixes.insert(0, n);
}
}
cur = p.parent();
}
if prefixes.is_empty() {
var_name.to_string()
} else {
format!("{}.{}", prefixes.join("."), var_name)
}
}
fn js_class_declaration_name_from(class_node: Node, source: &str) -> Option<String> {
class_node
.child_by_field_name("name")
.and_then(|n| node_text_slice(n, source))
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
}
fn js_enclosing_class_declaration_name(start: Node, source: &str) -> Option<String> {
let mut cur = start.parent();
while let Some(p) = cur {
if p.kind() == "class_declaration" {
return js_class_declaration_name_from(p, source);
}
cur = p.parent();
}
None
}
fn js_property_name_text(method_node: Node, source: &str) -> Option<String> {
let name_node = method_node
.child_by_field_name("name")
.or_else(|| method_node.child_by_field_name("property"))?;
match name_node.kind() {
"property_identifier" | "identifier" | "private_property_identifier" => {
node_text_slice(name_node, source)
}
_ => None,
}
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
}
fn is_js_ts_class_member_fn(kind: &str) -> bool {
matches!(
kind,
"method_definition"
| "field_definition"
| "public_field_definition"
| "private_field_definition"
| "protected_field_definition"
)
}
fn js_ts_string_inner(spec: &str) -> String {
let t = spec.trim();
if t.len() >= 2 {
let bytes = t.as_bytes();
let q = bytes[0];
if (q == b'"' || q == b'\'') && bytes[t.len() - 1] == q {
return t[1..t.len() - 1].to_string();
}
}
t.to_string()
}
fn extract_js_ts_import_specifiers(tree: &Tree, source: &str) -> Vec<String> {
let mut out = Vec::new();
walk_tree(tree.root_node(), |node| {
if node.kind() != "import_statement" && node.kind() != "export_statement" {
return;
}
if let Some(src) = node.child_by_field_name("source") {
if src.kind() == "string" {
if let Some(raw) = node_text_slice(src, source) {
let inner = js_ts_string_inner(&raw);
if !inner.is_empty() {
out.push(inner);
}
}
}
}
});
out.sort();
out.dedup();
out
}
fn normalized_logical_path(path: &Path) -> String {
use std::path::Component;
let mut parts: Vec<String> = Vec::new();
let mut starts_root = false;
for c in path.components() {
match c {
Component::RootDir => starts_root = true,
Component::Prefix(_) => {}
Component::CurDir => {}
Component::Normal(s) => parts.push(s.to_string_lossy().into_owned()),
Component::ParentDir => {
parts.pop();
}
}
}
let s = parts.join("/");
if starts_root {
format!("/{s}")
} else {
s
}
}
fn resolve_js_ts_import_to_known_file(
spec: &str,
current_file: &str,
known_paths: &HashSet<String>,
) -> Option<String> {
let spec = spec.trim();
if spec.is_empty() {
return None;
}
if spec.starts_with('@') {
return None;
}
let try_extensions = |base: &str| -> Option<String> {
let base = base.replace('\\', "/");
for ext in ["", ".ts", ".tsx", ".js", ".jsx"] {
let cand = format!("{base}{ext}");
if known_paths.contains(&cand) {
return Some(cand);
}
}
for ext in [".ts", ".tsx", ".js", ".jsx"] {
let cand = format!("{base}/index{ext}");
if known_paths.contains(&cand) {
return Some(cand);
}
}
None
};
if spec.starts_with('.') {
let base = Path::new(current_file).parent()?;
let joined = base.join(spec);
let normalized = normalized_logical_path(&joined);
return try_extensions(&normalized);
}
let needle = spec.replace('\\', "/");
known_paths
.iter()
.filter(|p| {
let pn = p.replace('\\', "/");
(pn.ends_with(".ts")
|| pn.ends_with(".tsx")
|| pn.ends_with(".js")
|| pn.ends_with(".jsx"))
&& pn.contains(needle.as_str())
})
.min_by_key(|p| p.len())
.cloned()
}
fn extract_js_ts_graph_symbols(
tree: &Tree,
source: &str,
file_path: &str,
language: LanguageId,
) -> Vec<FunctionSymbol> {
let _ = language;
let mut out = Vec::new();
let mut seen = HashSet::new();
let root = tree.root_node();
let mut push = |logical: String, name: String| {
let fqn = non_java_file_scoped_fqn(file_path, &logical);
if seen.insert(fqn.clone()) {
out.push(FunctionSymbol {
name,
fqn,
class_fqn: None,
return_type: None,
param_types: Vec::new(),
param_count: 0,
modifiers: Vec::new(),
is_pointer_receiver: None,
});
}
};
walk_tree(root, |node| {
let kind = node.kind();
if kind == "function_declaration" {
if js_inside_class(node) {
return;
}
let Some(nm) = js_function_declaration_name(node, source) else {
return;
};
let logical = js_nested_function_declaration_prefix(node, source, &nm);
push(logical, nm);
return;
}
if is_js_ts_class_member_fn(kind) {
let Some(meth) = js_property_name_text(node, source) else {
return;
};
let cls = js_enclosing_class_declaration_name(node, source)
.unwrap_or_else(|| "anonymous_class".to_string());
let logical = format!("{cls}.{meth}");
push(logical, meth);
return;
}
if kind == "variable_declarator" {
let Some(val) = node.child_by_field_name("value") else {
return;
};
if !matches!(val.kind(), "arrow_function" | "function_expression") {
return;
}
let Some(name_n) = node.child_by_field_name("name") else {
return;
};
if name_n.kind() != "identifier" {
return;
}
let Some(var_name) = node_text_slice(name_n, source) else {
return;
};
let var_name = var_name.trim().to_string();
if var_name.is_empty() {
return;
}
if js_inside_class(node) {
let cls = js_enclosing_class_declaration_name(node, source)
.unwrap_or_else(|| "anonymous_class".to_string());
let logical = format!("{cls}.{var_name}");
push(logical, var_name);
} else {
let logical = js_prefix_from_ancestors_for_expr(val, source, &var_name);
push(logical, var_name);
}
}
});
out
}
fn js_ts_innermost_enclosing_logical(call: Node, source: &str) -> Option<String> {
let mut cur = call.parent();
while let Some(p) = cur {
match p.kind() {
"function_declaration" => {
if js_inside_class(p) {
cur = p.parent();
continue;
}
let nm = js_function_declaration_name(p, source)?;
return Some(js_nested_function_declaration_prefix(p, source, &nm));
}
k if is_js_ts_class_member_fn(k) => {
let meth = js_property_name_text(p, source)?;
let cls = js_enclosing_class_declaration_name(p, source)
.unwrap_or_else(|| "anonymous_class".to_string());
return Some(format!("{cls}.{meth}"));
}
"arrow_function" | "function_expression" => {
let mut up = Some(p);
while let Some(x) = up {
if x.kind() == "variable_declarator" {
let name_n = x.child_by_field_name("name")?;
if name_n.kind() != "identifier" {
return None;
}
let vn = node_text_slice(name_n, source)?;
let vn = vn.trim();
if vn.is_empty() {
return None;
}
return Some(js_prefix_from_ancestors_for_expr(p, source, vn));
}
up = x.parent();
}
}
_ => {}
}
cur = p.parent();
}
None
}
fn extract_js_ts_intrafile_calls(
tree: &Tree,
source: &str,
file_path: &str,
language: LanguageId,
name_to_fqn: &HashMap<String, String>,
) -> Vec<(String, String)> {
let _ = language;
let mut calls = Vec::new();
walk_tree(tree.root_node(), |inner| {
if inner.kind() != "call_expression" {
return;
}
let Some(logical) = js_ts_innermost_enclosing_logical(inner, source) else {
return;
};
let caller_fqn = non_java_file_scoped_fqn(file_path, &logical);
let Some(func_n) = inner.child_by_field_name("function") else {
return;
};
if func_n.kind() != "identifier" {
return;
}
let Some(callee_name) = node_text_slice(func_n, source) else {
return;
};
let callee_name = callee_name.trim();
if let Some(callee_fqn) = name_to_fqn.get(callee_name) {
calls.push((caller_fqn, callee_fqn.clone()));
}
});
calls
}
fn extract_java_spring_endpoints(source: &str) -> Vec<(Vec<String>, String, String)> {
let mut endpoints: Vec<(Vec<String>, String, String)> = Vec::new();
let mut class_base_path: Option<String> = None;
let mut pending_methods: Vec<String> = Vec::new();
let mut pending_path: Option<String> = None;
for line in source.lines() {
let trimmed = line.trim();
if trimmed.starts_with("@RequestMapping") && !trimmed.contains("method") {
if let Some(path) = extract_java_annotation_path(trimmed) {
class_base_path = Some(path);
}
continue;
}
if trimmed.starts_with("@RestController") || trimmed.starts_with("@Controller") {
continue;
}
if trimmed.starts_with('@') {
for (attr, verb) in [
("@GetMapping", "GET"),
("@PostMapping", "POST"),
("@PutMapping", "PUT"),
("@DeleteMapping", "DELETE"),
("@PatchMapping", "PATCH"),
] {
if trimmed.starts_with(attr) {
pending_methods.push(verb.to_string());
if let Some(path) = extract_java_annotation_path(trimmed) {
pending_path = Some(path);
} else {
pending_path = Some("/".to_string());
}
}
}
if trimmed.starts_with("@RequestMapping") {
if let Some(path) = extract_java_annotation_path(trimmed) {
pending_path = Some(path);
}
if trimmed.contains("RequestMethod.GET") {
pending_methods.push("GET".to_string());
} else if trimmed.contains("RequestMethod.POST") {
pending_methods.push("POST".to_string());
} else if trimmed.contains("RequestMethod.PUT") {
pending_methods.push("PUT".to_string());
} else if trimmed.contains("RequestMethod.DELETE") {
pending_methods.push("DELETE".to_string());
} else if pending_path.is_some() && pending_methods.is_empty() {
pending_methods.push("ANY".to_string());
}
}
continue;
}
if (trimmed.starts_with("public ")
|| trimmed.starts_with("private ")
|| trimmed.starts_with("protected "))
&& trimmed.contains('(')
&& !pending_methods.is_empty()
{
let before_paren = match trimmed.split_once('(') {
Some((before, _)) => before,
None => continue,
};
let mut last = None;
for p in before_paren.split_whitespace() {
last = Some(p);
}
let method_name = match last {
Some(name) => name.trim().to_string(),
None => continue,
};
let path_template = if let Some(ref base) = class_base_path {
let method_path = pending_path.clone().unwrap_or_else(|| "/".to_string());
if method_path.starts_with('/') {
format!("{}{}", base.trim_end_matches('/'), method_path)
} else {
format!("{}/{}", base.trim_end_matches('/'), method_path)
}
} else {
pending_path.clone().unwrap_or_else(|| format!("/{}", method_name))
};
endpoints.push((pending_methods.clone(), path_template, method_name));
pending_methods.clear();
pending_path = None;
}
}
endpoints
}
fn extract_java_annotation_path(attr_line: &str) -> Option<String> {
if let Some(start) = attr_line.find('"') {
let rest = &attr_line[start + 1..];
if let Some(end) = rest.find('"') {
let path = &rest[..end];
if !path.is_empty() {
return Some(path.to_string());
}
}
}
None
}
fn extract_java_used_classes(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> Vec<(String, String)> {
let mut uses: Vec<(String, String)> = Vec::new();
let root = tree.root_node();
let import_map = build_internal_import_map(source);
walk_tree(root, |node| {
if node.kind() != "method_declaration" {
return;
}
let method_name = match identifier_text_from_children(node, source) {
Some(name) => name,
None => return,
};
let mut parent = node.parent();
let mut class_fqn: Option<String> = None;
while let Some(p) = parent {
let pk = p.kind();
if pk == "class_declaration" || pk == "interface_declaration" {
if let Some(class_name) = identifier_text_from_children(p, source) {
let full = if let Some(pkg) = package {
format!("{pkg}.{class_name}")
} else {
class_name
};
class_fqn = Some(full);
}
break;
}
parent = p.parent();
}
let func_fqn = if let Some(ref cls) = class_fqn {
format!("{cls}.{}", method_name)
} else if let Some(pkg) = package {
format!("{pkg}.{}", method_name)
} else {
method_name.clone()
};
walk_tree(node, |child| {
let kind = child.kind();
if kind == "type_identifier" || kind == "object_creation_expression" {
let type_name = if kind == "object_creation_expression" {
child.child_by_field_name("type")
.and_then(|t| {
let start = t.start_byte() as usize;
let end = t.end_byte() as usize;
if end <= source.len() && start < end {
Some(source[start..end].to_string())
} else {
None
}
})
} else {
let start = child.start_byte() as usize;
let end = child.end_byte() as usize;
if end <= source.len() && start < end {
Some(source[start..end].to_string())
} else {
None
}
};
if let Some(type_name) = type_name {
if is_java_primitive_or_builtin(&type_name) {
return;
}
let used_class_fqn = if let Some(fqn) = import_map.get(&type_name) {
fqn.clone()
} else if let Some(pkg) = package {
format!("{pkg}.{type_name}")
} else {
type_name
};
uses.push((func_fqn.clone(), used_class_fqn));
}
}
});
});
uses
}
fn is_java_primitive_or_builtin(name: &str) -> bool {
matches!(
name,
"int" | "long" | "short" | "byte" | "float" | "double" | "boolean" | "char" | "void"
| "String" | "Integer" | "Long" | "Short" | "Byte" | "Float" | "Double" | "Boolean"
| "Character" | "Object" | "Class" | "Void"
| "List" | "ArrayList" | "Map" | "HashMap" | "Set" | "HashSet"
| "Optional" | "Collection" | "Iterator"
)
}
struct GoCallGraphEdges {
calls_function: Vec<(String, String)>,
uses_class: Vec<(String, String)>,
class_uses_class: Vec<(String, String)>,
}
fn collect_go_call_graph_edges(
tree: &Tree,
source: &str,
package_name: Option<&str>,
) -> GoCallGraphEdges {
let mut calls_function = extract_go_calls(tree, source, package_name);
calls_function.extend(extract_go_goroutine_calls(tree, source, package_name));
GoCallGraphEdges {
calls_function,
uses_class: extract_go_used_types(tree, source, package_name),
class_uses_class: extract_go_embedding(tree, source, package_name),
}
}
fn apply_go_call_graph_to_ir(accumulator: &mut IrEdgeAccumulator, edges: GoCallGraphEdges) {
for (caller, callee) in edges.calls_function {
accumulator.calls_function.insert((caller, callee));
}
for (fn_fqn, struct_fqn) in edges.uses_class {
accumulator.uses_class.insert((fn_fqn, struct_fqn));
}
for (derived, base) in edges.class_uses_class {
accumulator.class_uses_class.insert((derived, base));
}
}
fn apply_go_call_graph_to_batch(accumulator: &mut BatchAccumulator, edges: GoCallGraphEdges) {
for (caller, callee) in edges.calls_function {
accumulator.add_calls_function(caller, callee);
}
for (fn_fqn, struct_fqn) in edges.uses_class {
accumulator.add_uses_class(fn_fqn, struct_fqn);
}
for (derived, base) in edges.class_uses_class {
accumulator.add_class_uses_class(derived, base);
}
}
async fn persist_go_structure(
graph: &Graph,
file_path: &str,
file: &ParsedFile,
source: &str,
known_paths: &HashSet<String>,
project_name: Option<String>,
accumulator: &mut BatchAccumulator,
scan_root: &Path,
go_modules: &[GoModule],
go_replaces: &[GoReplace],
persistence: &GraphPersistenceOptions,
compressor: Option<&CompressorClient>,
) -> Result<(), GraphError> {
let language = file.language.to_string();
let package_name = extract_go_package(source);
emit_limited_parse_warnings(
"Go",
file_path,
extract_go_parse_warnings(&file.tree, source),
persistence.max_parse_warnings_per_file,
);
let (structs, functions) = extract_go_symbols(&file.tree, source, package_name.as_deref());
let class_spans = extract_go_class_spans(&file.tree, source, package_name.as_deref());
let function_spans = extract_go_function_body_spans(&file.tree, source, package_name.as_deref());
for strct in &structs {
let class_kind = strct.kind.map(|k| k.to_string());
let code_bytes = code_bytes_for_span(
compressor,
source,
class_spans.get(&strct.fqn).copied(),
LanguageId::Go,
)
.await;
let q = query(
"
MATCH (f:File { path: $path })
MERGE (c:Class { fqn: $class_fqn })
ON CREATE SET c.name = $class_name,
c.path = $path,
c.project_name = $project_name,
c.language = $language,
c.kind = $class_kind,
c.code_bytes = $code_bytes
ON MATCH SET c.name = $class_name,
c.path = $path,
c.project_name = $project_name,
c.language = $language,
c.kind = coalesce($class_kind, c.kind),
c.code_bytes = coalesce($code_bytes, c.code_bytes)
MERGE (f)-[:DECLARES_CLASS]->(c)
",
)
.param("path", file_path.to_string())
.param("class_fqn", strct.fqn.clone())
.param("class_name", strct.name.clone())
.param("project_name", project_name.clone())
.param("language", language.clone())
.param("class_kind", class_kind)
.param(props::CODE_BYTES, code_bytes);
graph.run(q).await?;
}
for func in &functions {
let code_bytes = code_bytes_for_span(
compressor,
source,
function_spans.get(&func.fqn).copied(),
LanguageId::Go,
)
.await;
match &func.class_fqn {
Some(class_fqn) => {
let q = query(
"
MATCH (f:File { path: $path })
MERGE (cls:Class { fqn: $class_fqn })
MERGE (fn:Function { fqn: $fn_fqn })
ON CREATE SET fn.name = $fn_name,
fn.path = $path,
fn.project_name = $project_name,
fn.language = $language,
fn.is_pointer_receiver = $is_pointer_receiver,
fn.code_bytes = $code_bytes
ON MATCH SET fn.name = $fn_name,
fn.path = $path,
fn.project_name = $project_name,
fn.language = $language,
fn.is_pointer_receiver = coalesce($is_pointer_receiver, fn.is_pointer_receiver),
fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
MERGE (f)-[:DECLARES_FUNCTION]->(fn)
MERGE (cls)-[:DECLARES_FUNCTION]->(fn)
",
)
.param("path", file_path.to_string())
.param("class_fqn", class_fqn.clone())
.param("fn_fqn", func.fqn.clone())
.param("fn_name", func.name.clone())
.param("project_name", project_name.clone())
.param("language", language.clone())
.param("is_pointer_receiver", func.is_pointer_receiver)
.param(props::CODE_BYTES, code_bytes.clone());
graph.run(q).await?;
}
None => {
let q = query(
"
MATCH (f:File { path: $path })
MERGE (fn:Function { fqn: $fn_fqn })
ON CREATE SET fn.name = $fn_name,
fn.path = $path,
fn.project_name = $project_name,
fn.language = $language,
fn.is_pointer_receiver = $is_pointer_receiver,
fn.code_bytes = $code_bytes
ON MATCH SET fn.name = $fn_name,
fn.path = $path,
fn.project_name = $project_name,
fn.language = $language,
fn.is_pointer_receiver = coalesce($is_pointer_receiver, fn.is_pointer_receiver),
fn.code_bytes = coalesce($code_bytes, fn.code_bytes)
MERGE (f)-[:DECLARES_FUNCTION]->(fn)
",
)
.param("path", file_path.to_string())
.param("fn_fqn", func.fqn.clone())
.param("fn_name", func.name.clone())
.param("project_name", project_name.clone())
.param("language", language.clone())
.param("is_pointer_receiver", func.is_pointer_receiver)
.param(props::CODE_BYTES, code_bytes);
graph.run(q).await?;
}
}
}
let endpoints = extract_go_http_endpoints(source);
for (methods, path_template, handler_name) in endpoints {
let norm_path = normalize_api_path(&path_template);
let api_query = query(
"
MERGE (api:ApiEndpoint { path: $path })
ON CREATE SET api.methods = $methods,
api.protocol = 'http',
api.framework = 'go-http',
api.project_name = $project_name,
api.norm_path = $norm_path
ON MATCH SET api.methods = $methods,
api.protocol = coalesce(api.protocol, 'http'),
api.framework = coalesce(api.framework, 'go-http'),
api.project_name = coalesce(api.project_name, $project_name),
api.norm_path = coalesce(api.norm_path, $norm_path)
",
)
.param("path", path_template.clone())
.param("methods", methods.clone())
.param("project_name", project_name.clone())
.param("norm_path", norm_path.clone());
graph.run(api_query).await?;
for func in &functions {
if func.name == handler_name {
let rel_query = query(
"
MERGE (fn:Function { fqn: $fn_fqn })
MERGE (api:ApiEndpoint { path: $path })
MERGE (api)-[:HANDLED_BY]->(fn)
",
)
.param("fn_fqn", func.fqn.clone())
.param("path", path_template.clone());
graph.run(rel_query).await?;
}
}
}
let external_urls = extract_external_http_urls(source);
for full_url in external_urls {
let (protocol_opt, host, path) = split_url_protocol_host_and_path(&full_url);
let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
let base_url = format!("{protocol}://{host}");
let name = host.clone();
let norm_path = normalize_api_path(&path);
let ext_query = query(
"
MERGE (ext:ExternalApi { base_url: $base_url, norm_path: $norm_path })
ON CREATE SET ext.name = $name,
ext.path = $path,
ext.protocol = $protocol,
ext.provider = $provider,
ext.project_name = $project_name
ON MATCH SET ext.name = coalesce(ext.name, $name),
ext.path = coalesce(ext.path, $path),
ext.protocol = coalesce(ext.protocol, $protocol),
ext.provider = coalesce(ext.provider, $provider),
ext.project_name = coalesce(ext.project_name, $project_name)
",
)
.param("name", name.clone())
.param("base_url", base_url.clone())
.param("path", path.clone())
.param("norm_path", norm_path.clone())
.param("protocol", protocol.clone())
.param("provider", name.clone())
.param("project_name", project_name.clone());
graph.run(ext_query).await?;
for func in &functions {
accumulator.add_calls_external_api(
func.fqn.clone(),
base_url.clone(),
norm_path.clone(),
);
}
}
apply_go_call_graph_to_batch(
accumulator,
collect_go_call_graph_edges(&file.tree, source, package_name.as_deref()),
);
for imp in extract_go_imports(&file.tree, source) {
if let Some(dep_path) =
resolve_go_import_to_known_go_file(
&imp,
known_paths,
go_modules,
go_replaces,
Some(scan_root),
)
{
let dep_query = query(
"
MERGE (src:File { path: $src_path })
MERGE (dst:File { path: $dst_path })
MERGE (src)-[:DEPENDS_ON_FILE]->(dst)
",
)
.param("src_path", file_path.to_string())
.param("dst_path", dep_path.clone());
graph.run(dep_query).await?;
} else if should_log_unresolved_import(
persistence.verbose_imports,
is_go_stdlib_import(&imp),
is_likely_third_party_go_import(&imp),
) {
println!("Go import (unresolved to scanned files): `{}` in {}", imp, file_path);
}
}
Ok(())
}
fn extract_go_parse_warnings(tree: &Tree, source: &str) -> Vec<(usize, usize, String)> {
let mut out = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() == "ERROR" || node.is_missing() {
let pos = node.start_position();
let s = node.start_byte() as usize;
let e = (node.end_byte() as usize).min(source.len());
let snippet = if s < e {
source[s..e]
.chars()
.take(120)
.collect::<String>()
.replace('\n', " ")
} else {
String::new()
};
out.push((pos.row + 1, pos.column + 1, snippet));
}
});
out
}
fn go_go_callee_fqn_from_call_expression(
call: Node,
source: &str,
package: Option<&str>,
) -> Option<String> {
let func_node = call.child_by_field_name("function")?;
let s = func_node.start_byte() as usize;
let e = (func_node.end_byte() as usize).min(source.len());
if s >= e {
return None;
}
let callee_name = source[s..e].trim().to_string();
if callee_name.is_empty() {
return None;
}
Some(if callee_name.contains('.') {
callee_name
} else if let Some(pkg) = package {
format!("{pkg}.{callee_name}")
} else {
callee_name
})
}
fn go_decl_fqn_from_function_declaration(
decl: Node,
source: &str,
package: Option<&str>,
) -> Option<String> {
let name_node = decl.child_by_field_name("name")?;
let s = name_node.start_byte() as usize;
let e = (name_node.end_byte() as usize).min(source.len());
if s >= e {
return None;
}
let name = source[s..e].to_string();
Some(match package {
Some(pkg) => format!("{pkg}.{name}"),
None => name,
})
}
fn go_decl_fqn_from_method_declaration(
decl: Node,
source: &str,
package: Option<&str>,
) -> Option<String> {
let name_node = decl.child_by_field_name("name")?;
let s = name_node.start_byte() as usize;
let e = (name_node.end_byte() as usize).min(source.len());
if s >= e {
return None;
}
let method_name = source[s..e].to_string();
let receiver_type = decl.child_by_field_name("receiver").and_then(|r| {
let mut type_name = None;
walk_tree(r, |n| {
if n.kind() == "type_identifier" {
let s = n.start_byte() as usize;
let e = (n.end_byte() as usize).min(source.len());
if e <= source.len() && s < e {
type_name = Some(source[s..e].to_string());
}
}
});
type_name
})?;
let class_fqn = match package {
Some(pkg) => format!("{pkg}.{receiver_type}"),
None => receiver_type,
};
Some(format!("{class_fqn}.{method_name}"))
}
fn go_enclosing_func_decl_fqn_from_inner(node: Node, source: &str, package: Option<&str>) -> Option<String> {
let mut cur = node.parent();
while let Some(n) = cur {
match n.kind() {
"function_declaration" => return go_decl_fqn_from_function_declaration(n, source, package),
"method_declaration" => return go_decl_fqn_from_method_declaration(n, source, package),
_ => cur = n.parent(),
}
}
None
}
fn extract_go_goroutine_calls(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> Vec<(String, String)> {
let mut out = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "go_statement" {
return;
}
let Some(caller) = go_enclosing_func_decl_fqn_from_inner(node, source, package) else {
return;
};
let mut i = 0usize;
while let Some(expr) = node.named_child(i) {
i += 1;
if expr.kind() == "call_expression" {
if let Some(callee) = go_go_callee_fqn_from_call_expression(expr, source, package) {
out.push((caller.clone(), callee));
}
break;
}
}
});
out
}
fn go_type_leaf_for_embedding(mut n: Node) -> Node {
loop {
if n.kind() == "pointer_type" {
if let Some(inner) = n.named_child(0) {
n = inner;
continue;
}
}
return n;
}
}
fn go_embedding_type_fqn(type_node: Node, source: &str, package: Option<&str>) -> Option<String> {
let leaf = go_type_leaf_for_embedding(type_node);
let s = leaf.start_byte() as usize;
let e = (leaf.end_byte() as usize).min(source.len());
if s >= e {
return None;
}
let raw = source[s..e].trim().to_string();
if raw.is_empty() {
return None;
}
Some(if raw.contains('.') {
raw
} else if let Some(pkg) = package {
format!("{pkg}.{raw}")
} else {
raw
})
}
fn extract_go_embedding(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> Vec<(String, String)> {
let mut out = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "type_declaration" {
return;
}
let mut i = 0usize;
while let Some(child) = node.child(i) {
i += 1;
if child.kind() != "type_spec" {
continue;
}
let Some(type_n) = child.child_by_field_name("type") else {
continue;
};
if type_n.kind() != "struct_type" {
continue;
}
let Some(name_node) = child.child_by_field_name("name") else {
continue;
};
let s = name_node.start_byte() as usize;
let e = (name_node.end_byte() as usize).min(source.len());
if s >= e {
continue;
}
let struct_name = source[s..e].to_string();
let struct_fqn = match package {
Some(pkg) => format!("{pkg}.{struct_name}"),
None => struct_name,
};
walk_tree(type_n, |fd| {
if fd.kind() != "field_declaration" {
return;
}
if fd.child_by_field_name("name").is_some() {
return;
}
let Some(ty) = fd.child_by_field_name("type") else {
return;
};
if let Some(emb) = go_embedding_type_fqn(ty, source, package) {
out.push((struct_fqn.clone(), emb));
}
});
}
});
out
}
fn extract_go_imports(tree: &Tree, source: &str) -> Vec<String> {
let mut out = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
if node.kind() != "import_spec" {
return;
}
let Some(path_node) = node.child_by_field_name("path") else {
return;
};
let s = path_node.start_byte() as usize;
let e = (path_node.end_byte() as usize).min(source.len());
if s >= e {
return;
}
let raw = source[s..e].trim();
let path = raw.trim_matches('`').trim_matches('"').to_string();
if !path.is_empty() {
out.push(path);
}
});
out
}
fn extract_go_package(source: &str) -> Option<String> {
for line in source.lines() {
let line = line.trim();
if line.starts_with("package ") {
let pkg = line["package ".len()..].trim();
if !pkg.is_empty() {
return Some(pkg.to_string());
}
}
}
None
}
fn extract_go_symbols(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> (Vec<ClassSymbol>, Vec<FunctionSymbol>) {
let mut structs: Vec<ClassSymbol> = Vec::new();
let mut functions: Vec<FunctionSymbol> = Vec::new();
let mut seen_struct_names: HashSet<String> = HashSet::new();
let root = tree.root_node();
walk_tree(root, |node| {
let kind = node.kind();
match kind {
"type_declaration" => {
let mut i = 0usize;
while let Some(child) = node.child(i) {
i += 1;
if child.kind() != "type_spec" {
continue;
}
let Some(type_n) = child.child_by_field_name("type") else {
continue;
};
let kind_label: Option<&'static str> = if type_n.kind() == "struct_type" {
Some("struct")
} else if type_n.kind() == "interface_type" {
Some("interface")
} else {
continue;
};
let Some(name_node) = child.child_by_field_name("name") else {
continue;
};
let start = name_node.start_byte() as usize;
let end = (name_node.end_byte() as usize).min(source.len());
if start >= end {
continue;
}
let name = source[start..end].to_string();
let fqn = if let Some(pkg) = package {
format!("{pkg}.{name}")
} else {
name.clone()
};
if seen_struct_names.insert(fqn.clone()) {
structs.push(ClassSymbol {
name,
fqn,
kind: kind_label,
});
}
}
}
"function_declaration" => {
if let Some(name_node) = node.child_by_field_name("name") {
let start = name_node.start_byte() as usize;
let end = (name_node.end_byte() as usize).min(source.len());
if end <= source.len() && start < end {
let name = source[start..end].to_string();
let fqn = if let Some(pkg) = package {
format!("{pkg}.{name}")
} else {
name.clone()
};
functions.push(FunctionSymbol {
name,
fqn,
class_fqn: None,
return_type: None,
param_types: Vec::new(),
param_count: 0,
modifiers: Vec::new(),
is_pointer_receiver: None,
});
}
}
}
"method_declaration" => {
if let Some(name_node) = node.child_by_field_name("name") {
let start = name_node.start_byte() as usize;
let end = (name_node.end_byte() as usize).min(source.len());
if end <= source.len() && start < end {
let method_name = source[start..end].to_string();
let is_pointer_receiver = node
.child_by_field_name("receiver")
.map(|r| go_receiver_has_pointer(r))
.unwrap_or(false);
let receiver_type = node
.child_by_field_name("receiver")
.and_then(|r| {
let mut type_name = None;
walk_tree(r, |n| {
if n.kind() == "type_identifier" {
let s = n.start_byte() as usize;
let e = (n.end_byte() as usize).min(source.len());
if e <= source.len() && s < e {
type_name = Some(source[s..e].to_string());
}
}
});
type_name
});
let class_fqn = receiver_type.map(|t| {
if let Some(pkg) = package {
format!("{pkg}.{t}")
} else {
t
}
});
let fqn = if let Some(ref cls) = class_fqn {
format!("{cls}.{method_name}")
} else if let Some(pkg) = package {
format!("{pkg}.{method_name}")
} else {
method_name.clone()
};
functions.push(FunctionSymbol {
name: method_name,
fqn,
class_fqn,
return_type: None,
param_types: Vec::new(),
param_count: 0,
modifiers: Vec::new(),
is_pointer_receiver: Some(is_pointer_receiver),
});
}
}
}
_ => {}
}
});
(structs, functions)
}
fn go_receiver_has_pointer(receiver: Node) -> bool {
let mut ptr = false;
walk_tree(receiver, |n| {
if n.kind() == "pointer_type" {
ptr = true;
}
});
ptr
}
fn extract_go_http_endpoints(source: &str) -> Vec<(Vec<String>, String, String)> {
let mut endpoints: Vec<(Vec<String>, String, String)> = Vec::new();
for line in source.lines() {
let trimmed = line.trim();
if trimmed.contains("HandleFunc(") || trimmed.contains("Handle(") {
if let Some((path, handler)) = extract_go_handler_call(trimmed) {
endpoints.push((vec!["ANY".to_string()], path, handler));
}
}
for (pattern, method) in [
(".Get(", "GET"),
(".Post(", "POST"),
(".Put(", "PUT"),
(".Delete(", "DELETE"),
(".Patch(", "PATCH"),
] {
if trimmed.contains(pattern) {
if let Some((path, handler)) = extract_go_handler_call(trimmed) {
endpoints.push((vec![method.to_string()], path, handler));
}
}
}
for (pattern, method) in [
(".GET(", "GET"),
(".POST(", "POST"),
(".PUT(", "PUT"),
(".DELETE(", "DELETE"),
(".PATCH(", "PATCH"),
] {
if trimmed.contains(pattern) {
if let Some((path, handler)) = extract_go_handler_call(trimmed) {
endpoints.push((vec![method.to_string()], path, handler));
}
}
}
}
endpoints
}
fn extract_go_handler_call(line: &str) -> Option<(String, String)> {
let first_quote = line.find('"')?;
let rest = &line[first_quote + 1..];
let second_quote = rest.find('"')?;
let path = rest[..second_quote].to_string();
let after_path = &rest[second_quote + 1..];
let comma_idx = after_path.find(',')?;
let handler_part = &after_path[comma_idx + 1..];
let handler = handler_part
.split(|c: char| c == ')' || c == '(' || c == ',')
.next()?
.trim()
.to_string();
if handler.is_empty() || path.is_empty() {
return None;
}
Some((path, handler))
}
fn extract_go_used_types(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> Vec<(String, String)> {
let mut uses: Vec<(String, String)> = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
let kind = node.kind();
if kind != "function_declaration" && kind != "method_declaration" {
return;
}
let func_name = node
.child_by_field_name("name")
.and_then(|n| {
let s = n.start_byte() as usize;
let e = n.end_byte() as usize;
if e <= source.len() && s < e {
Some(source[s..e].to_string())
} else {
None
}
});
let func_name = match func_name {
Some(n) => n,
None => return,
};
let func_fqn = if let Some(pkg) = package {
format!("{pkg}.{func_name}")
} else {
func_name
};
walk_tree(node, |child| {
if child.kind() == "type_identifier" {
let s = child.start_byte() as usize;
let e = child.end_byte() as usize;
if e <= source.len() && s < e {
let type_name = source[s..e].to_string();
if !is_go_builtin_type(&type_name) {
let type_fqn = if let Some(pkg) = package {
format!("{pkg}.{type_name}")
} else {
type_name
};
uses.push((func_fqn.clone(), type_fqn));
}
}
}
});
});
uses
}
fn is_go_builtin_type(name: &str) -> bool {
matches!(
name,
"int" | "int8" | "int16" | "int32" | "int64"
| "uint" | "uint8" | "uint16" | "uint32" | "uint64" | "uintptr"
| "float32" | "float64" | "complex64" | "complex128"
| "bool" | "string" | "byte" | "rune" | "error"
)
}
fn extract_go_calls(
tree: &Tree,
source: &str,
package: Option<&str>,
) -> Vec<(String, String)> {
let mut calls: Vec<(String, String)> = Vec::new();
let root = tree.root_node();
walk_tree(root, |node| {
let kind = node.kind();
if kind != "function_declaration" && kind != "method_declaration" {
return;
}
let func_name = node
.child_by_field_name("name")
.and_then(|n| {
let s = n.start_byte() as usize;
let e = n.end_byte() as usize;
if e <= source.len() && s < e {
Some(source[s..e].to_string())
} else {
None
}
});
let func_name = match func_name {
Some(n) => n,
None => return,
};
let caller_fqn = if let Some(pkg) = package {
format!("{pkg}.{func_name}")
} else {
func_name
};
walk_tree(node, |child| {
if child.kind() == "call_expression" {
if let Some(func_node) = child.child_by_field_name("function") {
let s = func_node.start_byte() as usize;
let e = func_node.end_byte() as usize;
if e <= source.len() && s < e {
let callee_name = source[s..e].to_string();
let callee_fqn = if callee_name.contains('.') {
callee_name
} else if let Some(pkg) = package {
format!("{pkg}.{callee_name}")
} else {
callee_name
};
calls.push((caller_fqn.clone(), callee_fqn));
}
}
}
});
});
calls
}
#[derive(Debug, Clone)]
pub struct ExtractOptions {
pub verbose_imports: bool,
pub max_parse_warnings_per_file: usize,
pub compressor: CompressorConfig,
}
impl Default for ExtractOptions {
fn default() -> Self {
Self {
verbose_imports: false,
max_parse_warnings_per_file: 50,
compressor: CompressorConfig::default(),
}
}
}
pub fn function_body_spans_for_file(
file: &ParsedFile,
file_path: &str,
source: &str,
) -> HashMap<String, (usize, usize)> {
match file.language {
LanguageId::Java => {
let package = extract_java_package(source);
extract_java_method_body_spans(&file.tree, source, package.as_deref())
}
LanguageId::CSharp => {
let namespace = extract_csharp_namespace(&file.tree, source);
extract_csharp_method_body_spans_map(&file.tree, source, namespace.as_deref())
}
LanguageId::Go => {
let package = extract_go_package(source);
extract_go_function_body_spans(&file.tree, source, package.as_deref())
}
LanguageId::Erlang => {
let module = resolve_erlang_module_name(&file.path, &file.tree, source);
module
.as_ref()
.map(|m| extract_erlang_function_spans(m, &file.tree, source))
.unwrap_or_default()
}
_ => extract_non_java_function_body_spans(file, source, file_path),
}
}
pub fn class_body_spans_for_file(
file: &ParsedFile,
source: &str,
) -> HashMap<String, (usize, usize)> {
match file.language {
LanguageId::Go => {
let package = extract_go_package(source);
extract_go_class_spans(&file.tree, source, package.as_deref())
}
LanguageId::CSharp => extract_csharp_class_spans(&file.tree, source),
_ => HashMap::new(),
}
}
pub fn property_body_spans_for_file(
file: &ParsedFile,
source: &str,
) -> HashMap<String, (usize, usize)> {
match file.language {
LanguageId::CSharp => extract_csharp_property_spans(&file.tree, source),
_ => HashMap::new(),
}
}
pub async fn enrich_project_ir_code_bytes(
ir: &mut ProjectIr,
root: &Path,
files: &[ParsedFile],
config: &CompressorConfig,
) -> Result<(), crate::compress::CompressError> {
if !config.enabled {
return Ok(());
}
let client = match CompressorClient::from_config(config) {
Ok(c) => c,
Err(e) => {
eprintln!("RedCompressor: failed to create client ({e}); skipping code_bytes");
return Ok(());
}
};
if let Err(e) = client.health_check().await {
eprintln!("RedCompressor: health check failed ({e}); compression may be unavailable");
}
let mut by_path: HashMap<String, &ParsedFile> = HashMap::new();
for file in files {
by_path.insert(neo4j_path_string(root, &file.path), file);
}
for func in &mut ir.functions {
let Some(file) = by_path.get(&func.path) else {
continue;
};
let spans = function_body_spans_for_file(file, &func.path, &file.source);
let Some(span) = spans.get(&func.fqn).copied() else {
continue;
};
func.code_bytes = code_bytes_for_span(
Some(&client),
&file.source,
Some(span),
file.language,
)
.await;
}
for module in &mut ir.modules {
let Some(file) = by_path.get(&module.path) else {
continue;
};
module.code_bytes =
compress_full_source(&file.source, LanguageId::Erlang, &client).await;
}
for class in &mut ir.classes {
let Some(file) = by_path.get(&class.path) else {
continue;
};
let spans = class_body_spans_for_file(file, &file.source);
let Some(span) = spans.get(&class.fqn).copied() else {
continue;
};
class.code_bytes = code_bytes_for_span(
Some(&client),
&file.source,
Some(span),
file.language,
)
.await;
}
Ok(())
}
#[derive(Debug, Default)]
struct IrEdgeAccumulator {
calls_function: HashSet<(String, String)>,
uses_class: HashSet<(String, String)>,
class_uses_class: HashSet<(String, String)>,
}
impl IrEdgeAccumulator {
fn flush_into(&self, edges: &mut Vec<EdgeIr>) {
for (caller, callee) in &self.calls_function {
edges.push(EdgeIr {
kind: EdgeKind::CallsFunction,
from_label: "Function".into(),
from_key: caller.clone(),
to_label: "Function".into(),
to_key: callee.clone(),
});
}
for (fn_fqn, cls_fqn) in &self.uses_class {
edges.push(EdgeIr {
kind: EdgeKind::UsesClass,
from_label: "Function".into(),
from_key: fn_fqn.clone(),
to_label: "Class".into(),
to_key: cls_fqn.clone(),
});
}
for (derived, base) in &self.class_uses_class {
edges.push(EdgeIr {
kind: EdgeKind::ClassUsesClass,
from_label: "Class".into(),
from_key: derived.clone(),
to_label: "Class".into(),
to_key: base.clone(),
});
}
}
}
fn push_depends_on_file(edges: &mut Vec<EdgeIr>, src: &str, dst: &str) {
edges.push(EdgeIr {
kind: EdgeKind::DependsOnFile,
from_label: "File".into(),
from_key: src.to_string(),
to_label: "File".into(),
to_key: dst.to_string(),
});
}
fn push_declares_function(edges: &mut Vec<EdgeIr>, from_label: &str, from_key: &str, fqn: &str) {
edges.push(EdgeIr {
kind: EdgeKind::DeclaresFunction,
from_label: from_label.into(),
from_key: from_key.to_string(),
to_label: "Function".into(),
to_key: fqn.to_string(),
});
}
pub fn build_project_ir(
root: &Path,
files: &[ParsedFile],
_options: &ExtractOptions,
) -> ProjectIr {
use crate::ir::{ApiEndpointIr, FileIr};
let mut ir = ProjectIr::empty();
let mut accumulator = IrEdgeAccumulator::default();
let known_paths: HashSet<String> = files
.iter()
.map(|f| neo4j_path_string(root, &f.path))
.collect();
let go_modules = discover_go_modules(root, false).unwrap_or_default();
let go_replaces = discover_go_replaces(root, false).unwrap_or_default();
let csharp_batch_index = build_csharp_batch_index(files, root);
let erlang_module_index = build_erlang_module_index(files);
for file in files {
let path = neo4j_path_string(root, &file.path);
let language = file.language.to_string();
let project_name = derive_project_name(&file.path, root);
let source = &file.source;
ir.files.push(FileIr {
path: path.clone(),
language: language.clone(),
framework: None,
project_name: project_name.clone(),
});
match file.language {
LanguageId::Java => {
append_java_class_ir(&mut ir, &path, project_name.clone(), &file.tree, source);
let package = extract_java_package(source);
let (_, methods) =
extract_java_symbols(&file.tree, source, package.as_deref());
for func in &methods {
ir.functions.push(FunctionIr {
name: func.name.clone(),
fqn: func.fqn.clone(),
path: path.clone(),
language: language.clone(),
framework: None,
project_name: project_name.clone(),
arity: None,
return_type: func.return_type.clone(),
param_count: Some(func.param_count as u32),
param_types: func.param_types.clone(),
code_bytes: None,
});
if let Some(class_fqn) = &func.class_fqn {
push_declares_function(&mut ir.edges, "Class", class_fqn, &func.fqn);
}
push_declares_function(&mut ir.edges, "File", &path, &func.fqn);
}
for (derived, base) in
extract_java_inheritance_edges(&file.tree, source, package.as_deref())
{
accumulator.class_uses_class.insert((derived, base));
}
for (cls, dep) in
extract_java_injected_dependencies(&file.tree, source, package.as_deref())
{
accumulator.class_uses_class.insert((cls, dep));
}
for (caller, callee) in extract_java_calls(&file.tree, source, package.as_deref()) {
accumulator.calls_function.insert((caller, callee));
}
for import_fqn in extract_internal_java_imports(source) {
if let Some(dep_path) = map_import_to_project_path(&path, &import_fqn) {
if known_paths.contains(&dep_path) {
push_depends_on_file(&mut ir.edges, &path, &dep_path);
}
}
}
for (http_methods, path_template, handler_name) in extract_java_spring_endpoints(source)
{
let norm_path = normalize_api_path(&path_template);
ir.api_endpoints.push(ApiEndpointIr {
methods: http_methods.clone(),
path: path_template.clone(),
protocol: Some("http".into()),
framework: Some("spring".into()),
project_name: project_name.clone(),
});
if let Some(handler_fqn) =
resolve_java_handler_fqn(&http_methods, &handler_name, &ir)
{
ir.edges.push(EdgeIr {
kind: EdgeKind::HandlesApi,
from_label: "ApiEndpoint".into(),
from_key: api_endpoint_key(&http_methods, &path_template),
to_label: "Function".into(),
to_key: handler_fqn,
});
}
let _ = norm_path;
}
}
LanguageId::CSharp => {
append_csharp_structural_ir(
&mut ir,
&path,
project_name.clone(),
&file.tree,
source,
);
let namespace = extract_csharp_namespace(&file.tree, source);
let using_summary = extract_csharp_using_summary(&file.tree, source);
let (classes, methods, _) = extract_csharp_symbols(&file.tree, source);
for func in &methods {
ir.functions.push(FunctionIr {
name: func.name.clone(),
fqn: func.fqn.clone(),
path: path.clone(),
language: language.clone(),
framework: None,
project_name: project_name.clone(),
arity: None,
return_type: func.return_type.clone(),
param_count: Some(func.param_count as u32),
param_types: func.param_types.clone(),
code_bytes: None,
});
if let Some(class_fqn) = &func.class_fqn {
push_declares_function(&mut ir.edges, "Class", class_fqn, &func.fqn);
}
push_declares_function(&mut ir.edges, "File", &path, &func.fqn);
}
for ns in &using_summary.namespace_imports {
let Some(dep_paths) = csharp_batch_index.namespace_to_paths.get(ns) else {
continue;
};
for dep_path in dep_paths {
if dep_path == &path || !known_paths.contains(dep_path) {
continue;
}
push_depends_on_file(&mut ir.edges, &path, dep_path);
}
}
for (derived, base) in extract_csharp_class_inheritance_edges(
&file.tree,
source,
namespace.as_deref(),
&using_summary,
&csharp_batch_index,
) {
accumulator.class_uses_class.insert((derived, base));
}
for (fn_fqn, class_fqn) in extract_csharp_used_classes(
&file.tree,
source,
namespace.as_deref(),
&using_summary,
&csharp_batch_index,
) {
accumulator.uses_class.insert((fn_fqn, class_fqn));
}
for (caller, callee) in extract_csharp_calls(
&file.tree,
source,
namespace.as_deref(),
&using_summary,
&csharp_batch_index,
) {
accumulator.calls_function.insert((caller, callee));
}
let _ = classes;
}
LanguageId::Erlang => {
extract_erlang_to_ir(
&mut ir,
&mut accumulator,
file,
&path,
source,
project_name.clone(),
&language,
&erlang_module_index,
);
}
LanguageId::Go => {
let package_name = extract_go_package(source);
let (structs, functions) =
extract_go_symbols(&file.tree, source, package_name.as_deref());
for s in &structs {
ir.classes.push(ClassIr {
fqn: s.fqn.clone(),
name: s.name.clone(),
path: path.clone(),
language: language.clone(),
project_name: project_name.clone(),
kind: s.kind.map(str::to_string),
code_bytes: None,
});
ir.edges.push(EdgeIr {
kind: EdgeKind::DeclaresClass,
from_label: "File".into(),
from_key: path.clone(),
to_label: "Class".into(),
to_key: s.fqn.clone(),
});
}
for func in &functions {
ir.functions.push(FunctionIr {
name: func.name.clone(),
fqn: func.fqn.clone(),
path: path.clone(),
language: language.clone(),
framework: None,
project_name: project_name.clone(),
arity: None,
return_type: func.return_type.clone(),
param_count: Some(func.param_count as u32),
param_types: func.param_types.clone(),
code_bytes: None,
});
if let Some(class_fqn) = &func.class_fqn {
push_declares_function(&mut ir.edges, "Class", class_fqn, &func.fqn);
}
push_declares_function(&mut ir.edges, "File", &path, &func.fqn);
}
for import_path in extract_go_imports(&file.tree, source) {
if let Some(dep) = resolve_go_import_to_known_go_file(
&import_path,
&known_paths,
&go_modules,
&go_replaces,
Some(root),
) {
push_depends_on_file(&mut ir.edges, &path, &dep);
}
}
apply_go_call_graph_to_ir(
&mut accumulator,
collect_go_call_graph_edges(&file.tree, source, package_name.as_deref()),
);
}
_ => {
extract_non_java_to_ir(
&mut ir,
&mut accumulator,
file,
&path,
source,
project_name.clone(),
&language,
&known_paths,
_options,
);
}
}
}
accumulator.flush_into(&mut ir.edges);
apply_same_api_edges(&mut ir);
ir
}
fn resolve_java_handler_fqn(
_methods: &[String],
handler_name: &str,
ir: &ProjectIr,
) -> Option<String> {
ir.functions
.iter()
.find(|f| f.name == handler_name)
.map(|f| f.fqn.clone())
}
fn extract_erlang_to_ir(
ir: &mut ProjectIr,
accumulator: &mut IrEdgeAccumulator,
file: &ParsedFile,
file_path: &str,
source: &str,
project_name: Option<String>,
language: &str,
erlang_module_index: &HashMap<String, ErlangModuleSnapshot>,
) {
use crate::ir::{ApiEndpointIr, BehaviourIr, CallbackIr, ExternalApiIr, ModuleIr};
let module_name = resolve_erlang_module_name(&file.path, &file.tree, source);
let erlang_meta = extract_erlang_behaviour_metadata_from_tree(&file.tree, source);
if let Some(module_name) = module_name.as_ref() {
ir.modules.push(ModuleIr {
name: module_name.clone(),
path: file_path.to_string(),
language: language.to_string(),
framework: None,
project_name: project_name.clone(),
code_bytes: None,
});
ir.edges.push(EdgeIr {
kind: EdgeKind::DeclaresModule,
from_label: "File".into(),
from_key: file_path.to_string(),
to_label: "Module".into(),
to_key: module_key(module_name, file_path),
});
}
let functions = if let Some(module_name) = module_name.as_ref() {
extract_erlang_functions(module_name, &file.tree, source)
} else {
Vec::new()
};
for (fun_name, arity, fqn) in &functions {
ir.functions.push(FunctionIr {
name: fun_name.clone(),
fqn: fqn.clone(),
path: file_path.to_string(),
language: language.to_string(),
framework: None,
project_name: project_name.clone(),
arity: Some(*arity),
return_type: None,
param_count: None,
param_types: vec![],
code_bytes: None,
});
push_declares_function(&mut ir.edges, "File", file_path, fqn);
if let Some(module_name) = module_name.as_ref() {
push_declares_function(&mut ir.edges, "Module", &module_key(module_name, file_path), fqn);
}
}
let callback_contracts = collect_callback_contracts_for_module(
module_name.as_deref(),
&erlang_meta.behaviour_usages,
&erlang_meta.declared_callbacks,
&erlang_meta.optional_callbacks,
);
if let Some(module_name) = module_name.as_ref() {
for behaviour in &erlang_meta.behaviour_usages {
ir.behaviours.push(BehaviourIr {
name: behaviour.clone(),
path: None,
language: Some(language.to_string()),
project_name: project_name.clone(),
});
ir.edges.push(EdgeIr {
kind: EdgeKind::ImplementsBehaviour,
from_label: "Module".into(),
from_key: module_key(module_name, file_path),
to_label: "Behaviour".into(),
to_key: behaviour.clone(),
});
let dep_path = guess_erlang_file_path_from_module(file_path, behaviour);
push_depends_on_file(&mut ir.edges, file_path, &dep_path);
}
if !erlang_meta.declared_callbacks.is_empty() {
ir.behaviours.push(BehaviourIr {
name: module_name.clone(),
path: Some(file_path.to_string()),
language: Some(language.to_string()),
project_name: project_name.clone(),
});
ir.edges.push(EdgeIr {
kind: EdgeKind::DeclaresBehaviour,
from_label: "File".into(),
from_key: file_path.to_string(),
to_label: "Behaviour".into(),
to_key: module_name.clone(),
});
}
for parent in &erlang_meta.behaviour_extensions {
ir.edges.push(EdgeIr {
kind: EdgeKind::ExtendsBehaviour,
from_label: "Behaviour".into(),
from_key: module_name.clone(),
to_label: "Behaviour".into(),
to_key: parent.clone(),
});
}
}
for contract in &callback_contracts {
let callback_fqn = format!(
"{}:{}/{}",
contract.behaviour, contract.name, contract.arity
);
ir.callbacks.push(CallbackIr {
name: contract.name.clone(),
fqn: callback_fqn.clone(),
arity: contract.arity,
optional: contract.optional,
language: Some(language.to_string()),
project_name: project_name.clone(),
});
ir.edges.push(EdgeIr {
kind: EdgeKind::DeclaresCallback,
from_label: "Behaviour".into(),
from_key: contract.behaviour.clone(),
to_label: "Callback".into(),
to_key: callback_fqn.clone(),
});
}
let function_by_sig: HashMap<(String, u32), String> = functions
.iter()
.map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
.collect();
for contract in &callback_contracts {
let key = (contract.name.clone(), contract.arity);
let Some(fn_fqn) = function_by_sig.get(&key) else {
continue;
};
let callback_fqn = format!(
"{}:{}/{}",
contract.behaviour, contract.name, contract.arity
);
ir.edges.push(EdgeIr {
kind: EdgeKind::ImplementsCallback,
from_label: "Function".into(),
from_key: fn_fqn.clone(),
to_label: "Callback".into(),
to_key: callback_fqn.clone(),
});
}
for (name, arity) in &erlang_meta.overridden_callbacks {
let key = (name.clone(), *arity);
let Some(fn_fqn) = function_by_sig.get(&key) else {
continue;
};
for contract in callback_contracts
.iter()
.filter(|c| c.name == *name && c.arity == *arity)
{
let callback_fqn = format!(
"{}:{}/{}",
contract.behaviour, contract.name, contract.arity
);
ir.edges.push(EdgeIr {
kind: EdgeKind::OverridesCallback,
from_label: "Function".into(),
from_key: fn_fqn.clone(),
to_label: "Callback".into(),
to_key: callback_fqn,
});
}
}
for (methods, path_template, handler_module) in extract_erlang_api_endpoints(&file.tree, source)
{
let methods_owned: Vec<String> = methods.iter().map(|s| s.to_string()).collect();
ir.api_endpoints.push(ApiEndpointIr {
methods: methods_owned.clone(),
path: path_template.clone(),
protocol: Some("http".into()),
framework: Some("cowboy".into()),
project_name: project_name.clone(),
});
for fqn in select_endpoint_handler_fqns(&handler_module, erlang_module_index) {
ir.edges.push(EdgeIr {
kind: EdgeKind::HandlesApi,
from_label: "ApiEndpoint".into(),
from_key: api_endpoint_key(&methods_owned, &path_template),
to_label: "Function".into(),
to_key: fqn,
});
}
}
for full_url in extract_external_http_urls_from_tree(&file.tree, source) {
let (protocol_opt, host, url_path) = split_url_protocol_host_and_path(&full_url);
let protocol = protocol_opt.unwrap_or_else(|| "http".to_string());
let base_url = format!("{protocol}://{host}");
let norm_path = normalize_api_path(&url_path);
ir.external_apis.push(ExternalApiIr {
name: host.clone(),
base_url: Some(base_url.clone()),
protocol: Some(protocol),
provider: Some(host),
service: None,
norm_path: Some(norm_path.clone()),
});
for (_, _, fqn) in &functions {
ir.edges.push(EdgeIr {
kind: EdgeKind::CallsExternalApi,
from_label: "Function".into(),
from_key: fqn.clone(),
to_label: "ExternalApi".into(),
to_key: external_api_key(&base_url, &norm_path),
});
}
}
if let Some(module_name) = module_name.as_ref() {
let function_by_sig: HashMap<(String, u32), String> = functions
.iter()
.map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
.collect();
for (caller, callee) in extract_erlang_call_edges(
&file.tree,
source,
Some(module_name.as_str()),
&function_by_sig,
) {
accumulator.calls_function.insert((caller, callee));
}
}
}
fn extract_non_java_to_ir(
ir: &mut ProjectIr,
accumulator: &mut IrEdgeAccumulator,
file: &ParsedFile,
file_path: &str,
source: &str,
project_name: Option<String>,
language: &str,
known_paths: &HashSet<String>,
options: &ExtractOptions,
) {
let functions = extract_non_java_function_symbols(file, source, file_path);
let mut name_to_fqn_depth: HashMap<String, (String, usize)> = HashMap::new();
for f in &functions {
let logical = f
.fqn
.split_once("::")
.map(|(_, l)| l)
.unwrap_or(f.fqn.as_str());
let (short, depth) = non_java_short_name_and_depth(file.language, logical);
name_to_fqn_depth
.entry(short)
.and_modify(|(existing_fqn, existing_depth)| {
if depth > *existing_depth {
*existing_fqn = f.fqn.clone();
*existing_depth = depth;
}
})
.or_insert_with(|| (f.fqn.clone(), depth));
}
let name_to_fqn: HashMap<String, String> = name_to_fqn_depth
.into_iter()
.map(|(k, (v, _))| (k, v))
.collect();
for func in &functions {
ir.functions.push(FunctionIr {
name: func.name.clone(),
fqn: func.fqn.clone(),
path: file_path.to_string(),
language: language.to_string(),
framework: None,
project_name: project_name.clone(),
arity: None,
return_type: func.return_type.clone(),
param_count: Some(func.param_count as u32),
param_types: func.param_types.clone(),
code_bytes: None,
});
push_declares_function(&mut ir.edges, "File", file_path, &func.fqn);
}
match file.language {
LanguageId::Python => {
for imp in extract_python_import_modules(&file.tree, source) {
if let Some(dep) = resolve_python_import_to_known_file(&imp, known_paths) {
push_depends_on_file(&mut ir.edges, file_path, &dep);
} else if should_log_unresolved_import(
options.verbose_imports,
is_python_stdlib_top_level(&imp),
is_python_common_external_top_level(&imp),
) {
eprintln!(
"Python import (unresolved to scanned files): `{imp}` in {file_path}"
);
}
}
for (caller, callee) in
extract_python_intrafile_calls(&file.tree, source, file_path, &name_to_fqn)
{
accumulator.calls_function.insert((caller, callee));
}
}
LanguageId::JavaScript | LanguageId::TypeScript | LanguageId::Tsx => {
for spec in extract_js_ts_import_specifiers(&file.tree, source) {
if let Some(dep) =
resolve_js_ts_import_to_known_file(&spec, file_path, known_paths)
{
push_depends_on_file(&mut ir.edges, file_path, &dep);
} else if options.verbose_imports {
eprintln!("JS/TS import (unresolved): `{spec}` in {file_path}");
}
}
for (caller, callee) in extract_js_ts_intrafile_calls(
&file.tree,
source,
file_path,
file.language,
&name_to_fqn,
) {
accumulator.calls_function.insert((caller, callee));
}
}
LanguageId::Rust => {
for use_path in extract_rust_use_paths(&file.tree, source) {
if let Some(dep) =
resolve_rust_use_to_known_file(&use_path, file_path, known_paths)
{
push_depends_on_file(&mut ir.edges, file_path, &dep);
} else if options.verbose_imports {
eprintln!(
"Rust use (unresolved to scanned files): `{}` in {file_path}",
use_path.join("::")
);
}
}
for (caller, callee) in
extract_rust_intrafile_calls(&file.tree, source, file_path, &name_to_fqn)
{
accumulator.calls_function.insert((caller, callee));
}
}
_ => {}
}
}
fn apply_same_api_edges(ir: &mut ProjectIr) {
use crate::ir::{api_endpoint_key, external_api_key};
for ep in &ir.api_endpoints {
let ep_norm = normalize_api_path(&ep.path);
for ext in &ir.external_apis {
if ext.norm_path.as_deref() == Some(ep_norm.as_str()) {
ir.edges.push(EdgeIr {
kind: EdgeKind::SameApi,
from_label: "ApiEndpoint".into(),
from_key: api_endpoint_key(&ep.methods, &ep.path),
to_label: "ExternalApi".into(),
to_key: if let (Some(b), Some(n)) = (&ext.base_url, &ext.norm_path) {
external_api_key(b, n)
} else {
ext.name.clone()
},
});
}
}
}
}
pub async fn persist_project_ir_to_neo4j(
cfg: &Neo4jConfig,
ir: &ProjectIr,
clean: bool,
) -> Result<(), GraphError> {
let graph = Graph::new(&cfg.uri, &cfg.user, &cfg.password).await?;
if clean {
graph.run(query("MATCH (n) DETACH DELETE n")).await?;
}
for f in &ir.files {
let q = query(
"
MERGE (n:File { path: $path })
SET n.language = $language, n.project_name = $project_name
",
)
.param("path", f.path.clone())
.param("language", f.language.clone())
.param("project_name", f.project_name.clone());
graph.run(q).await?;
}
for m in &ir.modules {
let q = query(
"
MERGE (n:Module { name: $name, path: $path })
ON CREATE SET n.language = $language, n.project_name = $project_name,
n.code_bytes = $code_bytes
ON MATCH SET n.language = $language, n.project_name = $project_name,
n.code_bytes = coalesce($code_bytes, n.code_bytes)
",
)
.param("name", m.name.clone())
.param("path", m.path.clone())
.param("language", m.language.clone())
.param("project_name", m.project_name.clone())
.param(props::CODE_BYTES, m.code_bytes.clone());
graph.run(q).await?;
}
for c in &ir.classes {
let q = query(
"
MERGE (n:Class { fqn: $fqn })
ON CREATE SET n.name = $name, n.path = $path, n.language = $language,
n.project_name = $project_name, n.kind = $kind,
n.code_bytes = $code_bytes
ON MATCH SET n.name = $name, n.path = $path, n.language = $language,
n.project_name = $project_name, n.kind = coalesce($kind, n.kind),
n.code_bytes = coalesce($code_bytes, n.code_bytes)
",
)
.param("fqn", c.fqn.clone())
.param("name", c.name.clone())
.param("path", c.path.clone())
.param("language", c.language.clone())
.param("project_name", c.project_name.clone())
.param("kind", c.kind.clone())
.param(props::CODE_BYTES, c.code_bytes.clone());
graph.run(q).await?;
}
for f in &ir.functions {
let q = query(
"
MERGE (n:Function { fqn: $fqn })
ON CREATE SET n.name = $name, n.path = $path, n.language = $language,
n.project_name = $project_name, n.arity = $arity,
n.return_type = $return_type, n.param_count = $param_count,
n.code_bytes = $code_bytes
ON MATCH SET n.name = $name, n.path = $path, n.language = $language,
n.project_name = $project_name, n.arity = $arity,
n.return_type = $return_type, n.param_count = $param_count,
n.code_bytes = coalesce($code_bytes, n.code_bytes)
",
)
.param("fqn", f.fqn.clone())
.param("name", f.name.clone())
.param("path", f.path.clone())
.param("language", f.language.clone())
.param("project_name", f.project_name.clone())
.param("arity", f.arity.map(|a| a as i64))
.param("return_type", f.return_type.clone())
.param("param_count", f.param_count.map(|c| c as i64))
.param(props::CODE_BYTES, f.code_bytes.clone());
graph.run(q).await?;
}
for edge in &ir.edges {
let rel = edge.kind.to_rel_type().to_string();
let q = match edge.from_label.as_str() {
"Module" if edge.to_label == "Function" || edge.to_label == "Module" => {
let (name, path) = parse_module_key(&edge.from_key).unwrap_or_default();
let cypher = format!(
"
MERGE (a:Module {{ name: $from_name, path: $from_path }})
MERGE (b:{lbl_b} {{ {key_b}: $to_key }})
MERGE (a)-[:{rel}]->(b)
",
lbl_b = edge.to_label,
key_b = stable_key_property(&edge.to_label),
rel = rel,
);
query(&cypher)
.param("from_name", name)
.param("from_path", path)
.param("to_key", edge.to_key.clone())
}
_ => {
let cypher = format!(
"
MERGE (a:{lbl_a} {{ {key_a}: $from_key }})
MERGE (b:{lbl_b} {{ {key_b}: $to_key }})
MERGE (a)-[:{rel}]->(b)
",
lbl_a = edge.from_label,
key_a = stable_key_property(&edge.from_label),
lbl_b = edge.to_label,
key_b = stable_key_property(&edge.to_label),
rel = rel,
);
query(&cypher)
.param("from_key", edge.from_key.clone())
.param("to_key", edge.to_key.clone())
}
};
graph.run(q).await?;
}
Ok(())
}
fn parse_module_key(key: &str) -> Option<(String, String)> {
key.split_once('@').map(|(n, p)| (n.to_string(), p.to_string()))
}
fn stable_key_property(label: &str) -> &'static str {
match label {
"File" => "path",
"Module" => "name",
"Function" | "Class" | "Property" | "Callback" => "fqn",
"Behaviour" => "name",
"ApiEndpoint" => "path",
"ExternalApi" => "name",
_ => "name",
}
}
#[cfg(test)] mod tests {
use super::*;
use crate::ir::{EdgeKind, ProjectIr};
use crate::scanner::ParsedFile;
use crate::{parse_once, LanguageId};
use std::collections::HashSet;
use std::path::{Path, PathBuf};
#[test]
fn append_csharp_structural_ir_populates_classes_properties_and_edges() {
let src = r#"
namespace Ns {
public class C {
public string Name { get; set; }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let mut ir = ProjectIr::empty();
append_csharp_structural_ir(&mut ir, "/src/C.cs", None, &tree, src);
assert!(ir.classes.iter().any(|c| c.fqn == "Ns.C"));
assert!(ir.properties.iter().any(|p| p.fqn == "Ns.C.Name"));
assert!(
ir.edges
.iter()
.any(|e| e.kind == EdgeKind::DeclaresClass && e.to_key == "Ns.C")
);
assert!(ir
.edges
.iter()
.any(|e| e.kind == EdgeKind::DeclaresProperty && e.to_key == "Ns.C.Name"));
}
#[test]
fn csharp_using_summary_parses_ast_and_filters_system_microsoft() {
let src = r#"
using System;
using System.Collections.Generic;
using Microsoft.Extensions.Logging;
using OtherNs;
using static System.Math;
using AliasType = OtherNs.SomeType;
namespace ConsumerNs { class C { void M() { } } }
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let sum = extract_csharp_using_summary(&tree, src);
assert!(sum.namespace_imports.contains(&"OtherNs".to_string()));
assert!(!sum
.namespace_imports
.iter()
.any(|n| n.starts_with("System") || n.starts_with("Microsoft")));
assert_eq!(
sum.alias_map.get("AliasType").map(String::as_str),
Some("OtherNs.SomeType")
);
}
#[test]
fn csharp_global_using_included_in_summary() {
let src = r#"
global using SharedNs;
namespace N { class C { } }
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let sum = extract_csharp_using_summary(&tree, src);
assert!(sum.namespace_imports.contains(&"SharedNs".to_string()));
}
#[test]
fn csharp_api_endpoints_multiline_http_get_attribute() {
let src = r#"
namespace N {
public class C {
[HttpGet(
"/x")]
public void GetIt() { }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
assert_eq!(eps.len(), 1);
assert_eq!(eps[0].0, vec!["GET"]);
assert_eq!(eps[0].1, "/x");
assert_eq!(eps[0].2, "GetIt");
}
#[test]
fn csharp_api_endpoints_class_route_plus_method_http_get() {
let src = r#"
namespace N {
[Route("api/v1")]
public class OrdersController {
[HttpGet("orders")]
public void Get() { }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
assert_eq!(eps.len(), 1);
assert_eq!(eps[0].0, vec!["GET"]);
assert_eq!(eps[0].1, "/api/v1/orders");
assert_eq!(eps[0].2, "Get");
}
#[test]
fn csharp_api_endpoints_single_line_http_get_regression() {
let src = r#"
namespace N {
public class C {
[HttpGet("/api/orders")]
public void List() { }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
assert_eq!(eps.len(), 1);
assert_eq!(eps[0].1, "/api/orders");
assert_eq!(eps[0].2, "List");
}
#[test]
fn csharp_api_endpoints_comment_with_fake_attribute_not_parsed() {
let src = r#"
namespace N {
public class C {
// [HttpGet("/fake")]
[HttpGet("/real")]
public void A() { }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
assert_eq!(eps.len(), 1);
assert_eq!(eps[0].1, "/real");
}
#[test]
fn csharp_api_endpoints_comment_only_does_not_create_endpoint() {
let src = r#"
namespace N {
public class C {
// [HttpGet("/fake")]
public void M() { }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let eps = extract_csharp_api_endpoints_from_tree(&tree, src);
assert!(eps.is_empty(), "expected no endpoints, got {eps:?}");
}
#[test]
fn csharp_symbols_nested_class_fqns() {
let src = r#"
namespace N {
public class Outer {
public class Inner {
public void M() { }
}
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let (classes, methods, _) = extract_csharp_symbols(&tree, src);
let fqns: Vec<&str> = classes.iter().map(|c| c.fqn.as_str()).collect();
assert!(fqns.contains(&"N.Outer"));
assert!(fqns.contains(&"N.Outer.Inner"));
let m = methods.iter().find(|f| f.name == "M").expect("method M");
assert_eq!(m.class_fqn.as_deref(), Some("N.Outer.Inner"));
assert_eq!(m.fqn, "N.Outer.Inner.M");
}
#[test]
fn csharp_symbols_enum_kind_and_fqn() {
let src = r#"
namespace N {
public enum Color { Red, Green }
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let (classes, _, _) = extract_csharp_symbols(&tree, src);
let e = classes.iter().find(|c| c.name == "Color").expect("enum Color");
assert_eq!(e.fqn, "N.Color");
assert_eq!(e.kind, Some("enum"));
}
#[test]
fn csharp_symbols_record_kind_and_fqn() {
let src = r#"
namespace N {
public record Person(string Name);
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let (classes, _, _) = extract_csharp_symbols(&tree, src);
let r = classes.iter().find(|c| c.name == "Person").expect("record Person");
assert_eq!(r.fqn, "N.Person");
assert_eq!(r.kind, Some("record"));
}
#[test]
fn csharp_namespace_nested_blocks_in_fqn() {
let src = r#"
namespace A {
namespace B {
public class C {
public void M() {}
}
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let (classes, methods, _) = extract_csharp_symbols(&tree, src);
let c = classes.iter().find(|x| x.name == "C").expect("class C");
assert_eq!(c.fqn, "A.B.C");
let m = methods.iter().find(|f| f.name == "M").expect("method M");
assert_eq!(m.fqn, "A.B.C.M");
}
#[test]
fn csharp_namespace_file_scoped_extracts() {
let src = "namespace Ns;\npublic class X { }\n";
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
assert_eq!(
extract_csharp_namespace(&tree, src).as_deref(),
Some("Ns")
);
let (classes, _, _) = extract_csharp_symbols(&tree, src);
let x = classes.iter().find(|c| c.name == "X").expect("class X");
assert_eq!(x.fqn, "Ns.X");
}
#[test]
fn csharp_collect_file_namespace_strings_two_roots() {
let src = r#"
namespace A { public class Ca { } }
namespace B { public class Cb { } }
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let mut ns = collect_csharp_file_namespace_strings(&tree, src);
ns.sort();
assert_eq!(ns, vec!["A".to_string(), "B".to_string()]);
}
#[test]
fn csharp_class_inheritance_edges_resolved() {
let src_base = "namespace N { public class Base { } }";
let src_der = "namespace N { public class Derived : Base { } }";
let tree_b = parse_once(LanguageId::CSharp, src_base).expect("parse");
let tree_d = parse_once(LanguageId::CSharp, src_der).expect("parse");
let files = vec![
ParsedFile {
path: PathBuf::from("/b/Base.cs"),
language: LanguageId::CSharp,
tree: tree_b,
source: src_base.to_string(),
is_test: false,
},
ParsedFile {
path: PathBuf::from("/d/Derived.cs"),
language: LanguageId::CSharp,
tree: tree_d,
source: src_der.to_string(),
is_test: false,
},
];
let index = build_csharp_batch_index(&files, Path::new("."));
let using = extract_csharp_using_summary(&files[1].tree, &files[1].source);
let edges = extract_csharp_class_inheritance_edges(
&files[1].tree,
&files[1].source,
Some("N"),
&using,
&index,
);
assert!(
edges.contains(&(String::from("N.Derived"), String::from("N.Base"))),
"edges={edges:?}"
);
}
#[test]
fn csharp_symbols_constructor_function_matches_call_graph_fqn() {
let src = r#"
namespace Ns {
public class C {
public C() { M(); }
public void M() { }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let files = vec![ParsedFile {
path: PathBuf::from("/t/C.cs"),
language: LanguageId::CSharp,
tree,
source: src.to_string(),
is_test: false,
}];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
let (_, methods, _) = extract_csharp_symbols(&files[0].tree, &files[0].source);
let ctor = methods
.iter()
.find(|f| f.fqn == "Ns.C.ctor#0")
.expect("ctor#0 symbol");
assert_eq!(ctor.name, "ctor#0");
assert_eq!(ctor.class_fqn.as_deref(), Some("Ns.C"));
let calls = extract_csharp_calls(
&files[0].tree,
&files[0].source,
Some("Ns"),
&using_summary,
&index,
);
assert!(
calls.iter().any(|(a, b)| a == "Ns.C.ctor#0" && b == "Ns.C.M"),
"expected persisted ctor fqn as caller, got {calls:?}"
);
}
#[test]
fn csharp_symbols_property_declares_accessors_and_property_symbol() {
let src = r#"
namespace Ns {
public class C {
public string Name { get; set; }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let (_, methods, properties) = extract_csharp_symbols(&tree, src);
let pname = properties.iter().find(|p| p.name == "Name").expect("property Name");
assert_eq!(pname.fqn, "Ns.C.Name");
assert_eq!(pname.class_fqn, "Ns.C");
assert!(pname.declared_type.as_deref() == Some("string"));
let get_f = methods
.iter()
.find(|f| f.fqn == "Ns.C.get_Name")
.expect("getter function");
assert_eq!(get_f.name, "get_Name");
let set_f = methods
.iter()
.find(|f| f.fqn == "Ns.C.set_Name")
.expect("setter function");
assert_eq!(set_f.name, "set_Name");
}
#[test]
fn csharp_symbols_method_modifiers_params_return_type() {
let src = r#"
namespace N {
public class Api {
public static async System.Threading.Tasks.Task<int> Foo(string s, OrderDto o) { return 0; }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let (_, methods, _) = extract_csharp_symbols(&tree, src);
let f = methods.iter().find(|m| m.name == "Foo").expect("Foo");
assert!(f.modifiers.contains(&"public".to_string()));
assert!(f.modifiers.contains(&"static".to_string()));
assert!(f.modifiers.contains(&"async".to_string()));
assert_eq!(f.param_count, 2);
assert_eq!(f.param_types, vec!["string", "OrderDto"]);
assert!(f.return_type.as_deref().unwrap_or("").contains("Task<int>"));
}
#[test]
fn csharp_method_body_spans_nested_method_fqn() {
let src = r#"
namespace N {
public class Outer {
public class Inner {
public void M() { int x = 1; }
}
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let spans = csharp_method_body_spans(&tree, src, Some("N"));
let m = spans.iter().find(|(fqn, _, _)| fqn.ends_with(".M")).expect("span M");
assert_eq!(m.0, "N.Outer.Inner.M");
}
#[test]
fn csharp_uses_class_resolves_type_via_using_and_batch_index() {
let src_other = "namespace OtherNs { public class RemoteDto { } }\n";
let src_consumer = r#"
using OtherNs;
namespace ConsumerNs {
public class Consumer {
public void M() {
RemoteDto x;
}
}
}
"#;
let tree_other = parse_once(LanguageId::CSharp, src_other).expect("parse");
let tree_consumer = parse_once(LanguageId::CSharp, src_consumer).expect("parse");
let files = vec![
ParsedFile {
path: PathBuf::from("/repo/OtherNs/RemoteDto.cs"),
language: LanguageId::CSharp,
tree: tree_other,
source: src_other.to_string(),
is_test: false,
},
ParsedFile {
path: PathBuf::from("/repo/Consumer.cs"),
language: LanguageId::CSharp,
tree: tree_consumer,
source: src_consumer.to_string(),
is_test: false,
},
];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[1].tree, &files[1].source);
let uses = extract_csharp_used_classes(
&files[1].tree,
&files[1].source,
Some("ConsumerNs"),
&using_summary,
&index,
);
assert!(
uses.iter().any(|(_, cls)| cls == "OtherNs.RemoteDto"),
"expected USES_CLASS to OtherNs.RemoteDto, got {uses:?}"
);
}
#[test]
fn csharp_type_alias_using_resolves_to_aliased_class_fqn() {
let src_other = "namespace OtherNs { public class RemoteDto { } }\n";
let src_consumer = r#"
using R = OtherNs.RemoteDto;
namespace ConsumerNs {
public class Consumer {
public void M() {
R x;
}
}
}
"#;
let tree_other = parse_once(LanguageId::CSharp, src_other).expect("parse");
let tree_consumer = parse_once(LanguageId::CSharp, src_consumer).expect("parse");
let files = vec![
ParsedFile {
path: PathBuf::from("/p/Other.cs"),
language: LanguageId::CSharp,
tree: tree_other,
source: src_other.to_string(),
is_test: false,
},
ParsedFile {
path: PathBuf::from("/p/Consumer.cs"),
language: LanguageId::CSharp,
tree: tree_consumer,
source: src_consumer.to_string(),
is_test: false,
},
];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[1].tree, &files[1].source);
let uses = extract_csharp_used_classes(
&files[1].tree,
&files[1].source,
Some("ConsumerNs"),
&using_summary,
&index,
);
assert!(uses.iter().any(|(_, cls)| cls == "OtherNs.RemoteDto"));
}
#[test]
fn csharp_uses_class_does_not_use_method_name_as_type_from_invocation() {
let src = r#"
namespace Ns {
public class C {
public void GetOrder() { }
public void M() {
var x = GetOrder();
}
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse");
let files = vec![ParsedFile {
path: PathBuf::from("/t/C.cs"),
language: LanguageId::CSharp,
tree,
source: src.to_string(),
is_test: false,
}];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
let uses = extract_csharp_used_classes(
&files[0].tree,
&files[0].source,
Some("Ns"),
&using_summary,
&index,
);
assert!(
!uses.iter().any(|(_, cls)| cls == "Ns.GetOrder"),
"GetOrder() invocation must not create USES_CLASS to a synthetic type, got {uses:?}"
);
}
#[test]
fn csharp_uses_class_skips_bcl_datetime_in_parameter() {
let src = r#"
namespace Ns {
public class C {
public void M(System.DateTime d) { }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse");
let files = vec![ParsedFile {
path: PathBuf::from("/t/C.cs"),
language: LanguageId::CSharp,
tree,
source: src.to_string(),
is_test: false,
}];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
let uses = extract_csharp_used_classes(
&files[0].tree,
&files[0].source,
Some("Ns"),
&using_summary,
&index,
);
assert!(
!uses.iter().any(|(_, cls)| cls.contains("DateTime")),
"DateTime parameter should be filtered as BCL noise, got {uses:?}"
);
}
#[test]
fn csharp_uses_class_does_not_include_types_only_used_in_local_function() {
let src = r#"
namespace Ns {
public class Outer {
public void M() {
void Local() {
InnerOnly x;
}
}
}
public class InnerOnly { }
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse");
let files = vec![ParsedFile {
path: PathBuf::from("/t/Types.cs"),
language: LanguageId::CSharp,
tree,
source: src.to_string(),
is_test: false,
}];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
let uses = extract_csharp_used_classes(
&files[0].tree,
&files[0].source,
Some("Ns"),
&using_summary,
&index,
);
assert!(
!uses
.iter()
.any(|(caller, cls)| caller == "Ns.Outer.M" && cls.contains("InnerOnly")),
"InnerOnly only appears inside local function; outer M must not USES_CLASS it, got {uses:?}"
);
}
#[test]
fn csharp_calls_resolve_field_receiver_to_type_fqn() {
let src_repo =
"namespace OtherNs { public class OrderRepo { public void Get(int id) { } } }\n";
let src_svc = r#"
using OtherNs;
namespace ConsumerNs {
public class Svc {
private OrderRepo _repo;
public void M() { _repo.Get(1); }
}
}
"#;
let tree_repo = parse_once(LanguageId::CSharp, src_repo).expect("parse");
let tree_svc = parse_once(LanguageId::CSharp, src_svc).expect("parse");
let files = vec![
ParsedFile {
path: PathBuf::from("/r/OrderRepo.cs"),
language: LanguageId::CSharp,
tree: tree_repo,
source: src_repo.to_string(),
is_test: false,
},
ParsedFile {
path: PathBuf::from("/r/Svc.cs"),
language: LanguageId::CSharp,
tree: tree_svc,
source: src_svc.to_string(),
is_test: false,
},
];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[1].tree, &files[1].source);
let calls = extract_csharp_calls(
&files[1].tree,
&files[1].source,
Some("ConsumerNs"),
&using_summary,
&index,
);
assert!(
calls.iter().any(|(_, c)| c == "OtherNs.OrderRepo.Get"),
"expected callee OtherNs.OrderRepo.Get, got {calls:?}"
);
}
#[test]
fn csharp_calls_this_receiver_resolves_to_class_method() {
let src = r#"
namespace Ns {
public class C {
void Helper() { }
public void Run() { this.Helper(); }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse");
let files = vec![ParsedFile {
path: PathBuf::from("/t/C.cs"),
language: LanguageId::CSharp,
tree,
source: src.to_string(),
is_test: false,
}];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
let calls = extract_csharp_calls(
&files[0].tree,
&files[0].source,
Some("Ns"),
&using_summary,
&index,
);
assert!(
calls.iter().any(|(_, c)| c == "Ns.C.Helper"),
"expected callee Ns.C.Helper, got {calls:?}"
);
}
#[test]
fn csharp_calls_constructor_invocations_extracted() {
let src = r#"
namespace Ns {
public class C {
public C() { M(); }
public void M() { }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse");
let files = vec![ParsedFile {
path: PathBuf::from("/t/C.cs"),
language: LanguageId::CSharp,
tree,
source: src.to_string(),
is_test: false,
}];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
let calls = extract_csharp_calls(
&files[0].tree,
&files[0].source,
Some("Ns"),
&using_summary,
&index,
);
assert!(
calls.iter().any(|(a, b)| a == "Ns.C.ctor#0" && b == "Ns.C.M"),
"expected ctor caller Ns.C.ctor#0 -> Ns.C.M, got {calls:?}"
);
}
#[test]
fn csharp_calls_property_getter_invocations_extracted() {
let src = r#"
namespace Ns {
public class C {
public int Prop {
get { Helper(); return 1; }
}
void Helper() { }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse");
let files = vec![ParsedFile {
path: PathBuf::from("/t/C.cs"),
language: LanguageId::CSharp,
tree,
source: src.to_string(),
is_test: false,
}];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
let calls = extract_csharp_calls(
&files[0].tree,
&files[0].source,
Some("Ns"),
&using_summary,
&index,
);
assert!(
calls.iter().any(|(a, b)| a == "Ns.C.get_Prop" && b == "Ns.C.Helper"),
"expected getter caller Ns.C.get_Prop -> Ns.C.Helper, got {calls:?}"
);
}
#[test]
fn csharp_calls_console_writeline_uses_well_known_type() {
let src = r#"
namespace Ns {
public class C {
public void M() { Console.WriteLine("x"); }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse");
let files = vec![ParsedFile {
path: PathBuf::from("/t/C.cs"),
language: LanguageId::CSharp,
tree,
source: src.to_string(),
is_test: false,
}];
let index = build_csharp_batch_index(&files, Path::new("."));
let using_summary = extract_csharp_using_summary(&files[0].tree, &files[0].source);
let calls = extract_csharp_calls(
&files[0].tree,
&files[0].source,
Some("Ns"),
&using_summary,
&index,
);
assert!(
calls.iter().any(|(_, c)| c == "System.Console.WriteLine"),
"expected System.Console.WriteLine, got {calls:?}"
);
}
#[test]
fn csharp_external_http_urls_ignore_comments_not_string_literals() {
let src = r#"
namespace Ns {
class C {
// https://evil-line.example/x
void M() {
/* https://evil-block.example/y */
var x = "https://good.example/only";
}
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let urls = extract_csharp_external_http_urls_with_spans(&tree, src);
assert_eq!(urls.len(), 1, "expected one URL from string literal, got {urls:?}");
assert!(
urls[0].0.contains("good.example"),
"unexpected url {:?}",
urls[0].0
);
assert!(
!urls.iter().any(|(u, _, _)| u.contains("evil")),
"comment URLs must not appear: {urls:?}"
);
}
#[test]
fn csharp_external_api_links_url_only_to_methods_whose_body_contains_literal() {
let src = r#"
namespace Ns {
public class C {
void A() { var x = "https://api-a.example/v1"; }
void B() { var y = "https://api-b.example/v2"; }
}
}
"#;
let tree = parse_once(LanguageId::CSharp, src).expect("parse C#");
let (_, methods, _) = extract_csharp_symbols(&tree, src);
let url_spans = extract_csharp_external_http_urls_with_spans(&tree, src);
let method_spans = csharp_method_body_spans(&tree, src, Some("Ns"));
let mut spans_by_fqn: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
for (fqn, lo, hi) in method_spans {
spans_by_fqn.entry(fqn).or_default().push((lo, hi));
}
let mut pairs: Vec<(String, String)> = Vec::new();
for (full_url, u_start, u_end) in &url_spans {
for func in &methods {
let Some(ranges) = spans_by_fqn.get(&func.fqn) else {
continue;
};
if !ranges
.iter()
.any(|(lo, hi)| *lo <= *u_start && *u_end <= *hi)
{
continue;
}
pairs.push((func.fqn.clone(), full_url.clone()));
}
}
pairs.sort();
assert!(
pairs.contains(&(String::from("Ns.C.A"), String::from("https://api-a.example/v1"))),
"missing A->api-a, got {pairs:?}"
);
assert!(
pairs.contains(&(String::from("Ns.C.B"), String::from("https://api-b.example/v2"))),
"missing B->api-b, got {pairs:?}"
);
assert!(
!pairs.contains(&(String::from("Ns.C.A"), String::from("https://api-b.example/v2"))),
"N×M leak: A linked to B's URL: {pairs:?}"
);
assert!(
!pairs.contains(&(String::from("Ns.C.B"), String::from("https://api-a.example/v1"))),
"N×M leak: B linked to A's URL: {pairs:?}"
);
}
#[test]
fn resolves_module_name_from_erl_attribute() {
let source = "-module(real_mod).\nfoo() -> ok.\n";
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let resolved = resolve_erlang_module_name(Path::new("/tmp/not_matching.erl"), &tree, source);
assert_eq!(resolved.as_deref(), Some("real_mod"));
}
#[test]
fn resolves_module_name_from_erl_basename_fallback() {
let source = "foo() -> ok.\n";
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let resolved = resolve_erlang_module_name(Path::new("/tmp/fallback_name.erl"), &tree, source);
assert_eq!(resolved.as_deref(), Some("fallback_name"));
}
#[test]
fn does_not_fallback_module_name_for_hrl() {
let source = "-define(FLAG, true).\n";
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let resolved = resolve_erlang_module_name(Path::new("/tmp/records.hrl"), &tree, source);
assert_eq!(resolved, None);
}
#[test]
fn extracts_erlang_behaviour_usages_from_ast() {
let source = r#"
-behaviour(gen_server).
-behavior(custom_behaviour).
foo() -> ok.
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
assert!(meta.behaviour_usages.contains("gen_server"));
assert!(meta.behaviour_usages.contains("custom_behaviour"));
}
#[test]
fn extracts_declared_and_optional_callbacks_from_ast() {
let source = r#"
-callback init(term()) -> {ok, state()}.
-callback handle_call(term(), term(), term()) -> {reply, ok, term()}.
-optional_callbacks([handle_call/3]).
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
assert!(meta.declared_callbacks.contains(&(String::from("init"), 1)));
assert!(
meta.declared_callbacks
.contains(&(String::from("handle_call"), 3))
);
assert!(
meta.optional_callbacks
.contains(&(String::from("handle_call"), 3))
);
assert!(!meta.optional_callbacks.contains(&(String::from("init"), 1)));
}
#[test]
fn extracts_behaviour_extension_and_override_hints_from_ast() {
let source = r#"
-extends_behaviour(base_handler).
-override_callback(handle_call/3).
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
assert!(meta.behaviour_extensions.contains("base_handler"));
assert!(
meta.overridden_callbacks
.contains(&(String::from("handle_call"), 3))
);
}
#[test]
fn ast_extracts_multiline_and_quoted_attributes() {
let source = r#"
-'behaviour'('gen_server').
-callback
'handle_call'(
term(),
term(),
term()
) ->
{reply, ok, term()}.
-optional_callbacks([
'handle_call'/3
]).
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
assert!(meta.behaviour_usages.contains("gen_server"));
assert!(
meta.declared_callbacks
.contains(&(String::from("handle_call"), 3))
);
assert!(
meta.optional_callbacks
.contains(&(String::from("handle_call"), 3))
);
}
#[test]
fn ast_skips_macro_or_variable_names_for_safety() {
let source = r#"
-behaviour(?DYN_BEHAVIOUR).
-optional_callbacks([?CALLBACK/2]).
-extends_behaviour(ParentVar).
-override_callback(?OVERRIDE/3).
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
assert!(meta.behaviour_usages.is_empty());
assert!(meta.optional_callbacks.is_empty());
assert!(meta.behaviour_extensions.is_empty());
assert!(meta.overridden_callbacks.is_empty());
}
#[test]
fn maps_functions_to_callback_contracts() {
let source = r#"
-behaviour(gen_server).
-callback local_cb(term()) -> ok.
-optional_callbacks([local_cb/1]).
init(Args) -> {ok, Args}.
handle_call(_Req, _From, State) -> {reply, ok, State}.
local_cb(X) -> X.
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let meta = extract_erlang_behaviour_metadata_from_tree(&tree, source);
let contracts = collect_callback_contracts_for_module(
Some("my_behaviour"),
&meta.behaviour_usages,
&meta.declared_callbacks,
&meta.optional_callbacks,
);
assert!(contracts.iter().any(|c| {
c.behaviour == "gen_server" && c.name == "handle_call" && c.arity == 3
}));
assert!(contracts.iter().any(|c| {
c.behaviour == "my_behaviour"
&& c.name == "local_cb"
&& c.arity == 1
&& c.optional
}));
}
#[test]
fn extracts_erlang_functions_from_ast_multiline_and_quoted() {
let source = r#"
-module(my_handler).
'special_name'(
Req,
State
) ->
{ok, State}.
websocket_handle(Frame, State) ->
{ok, State}.
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let functions = extract_erlang_functions("my_handler", &tree, source);
let sigs: HashSet<(String, u32)> = functions
.iter()
.map(|(name, arity, _)| (name.clone(), *arity))
.collect();
assert!(sigs.contains(&(String::from("special_name"), 2)));
assert!(sigs.contains(&(String::from("websocket_handle"), 2)));
}
#[test]
fn erlang_function_spans_keyed_by_fqn() {
let source = r#"
-module(m).
handle(Req, State) ->
{ok, State}.
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("parse");
let spans = extract_erlang_function_spans("m", &tree, source);
let fqn = "m:handle/2";
let (lo, hi) = spans.get(fqn).copied().expect("span for handle/2");
assert!(source[lo..hi].contains("handle(Req, State)"));
}
#[test]
fn java_method_body_spans_include_method_body() {
let source = r#"
package com.example;
class A {
void m() { return; }
}
"#;
let tree = parse_once(LanguageId::Java, source).expect("parse");
let spans = extract_java_method_body_spans(&tree, source, Some("com.example"));
let (lo, hi) = spans.get("com.example.A.m").copied().expect("span");
assert!(source[lo..hi].contains("return"));
}
#[test]
fn selects_endpoint_handler_callbacks_from_contracts_and_implemented_signatures() {
let mut idx = HashMap::new();
idx.insert(
String::from("omega_ws_handler"),
ErlangModuleSnapshot {
implemented_signatures: HashSet::from([
(String::from("init"), 2),
(String::from("websocket_handle"), 2),
(String::from("not_a_callback"), 1),
]),
callback_signatures: HashSet::from([
(String::from("init"), 2),
(String::from("websocket_handle"), 2),
(String::from("websocket_info"), 2),
]),
},
);
let fqns = select_endpoint_handler_fqns("omega_ws_handler", &idx);
assert_eq!(
fqns,
vec![
String::from("omega_ws_handler:init/2"),
String::from("omega_ws_handler:websocket_handle/2"),
]
);
}
#[test]
fn endpoint_handler_callback_selection_is_strict_when_metadata_missing() {
let mut idx = HashMap::new();
idx.insert(
String::from("router_only"),
ErlangModuleSnapshot {
implemented_signatures: HashSet::from([(String::from("init"), 2)]),
callback_signatures: HashSet::new(),
},
);
let none_for_unknown = select_endpoint_handler_fqns("missing_module", &idx);
assert!(none_for_unknown.is_empty());
let none_for_no_contracts = select_endpoint_handler_fqns("router_only", &idx);
assert!(none_for_no_contracts.is_empty());
}
#[test]
fn extracts_precise_erlang_call_edges_from_ast() {
let source = r#"
-module(my_mod).
a() -> b(), c(), ok.
b() -> ok.
c() -> lists:map(fun(X) -> X end, [1,2]).
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let functions = extract_erlang_functions("my_mod", &tree, source);
let function_by_sig: HashMap<(String, u32), String> = functions
.iter()
.map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
.collect();
let edges = extract_erlang_call_edges(&tree, source, Some("my_mod"), &function_by_sig);
assert!(edges.contains(&(String::from("my_mod:a/0"), String::from("my_mod:b/0"))));
assert!(edges.contains(&(String::from("my_mod:a/0"), String::from("my_mod:c/0"))));
assert!(!edges.contains(&(String::from("my_mod:c/0"), String::from("my_mod:map/2"))));
}
#[test]
fn does_not_create_nm_edges_for_sparse_local_calls() {
let source = r#"
-module(my_mod).
a() -> b().
b() -> ok.
c() -> ok.
d() -> ok.
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let functions = extract_erlang_functions("my_mod", &tree, source);
let function_by_sig: HashMap<(String, u32), String> = functions
.iter()
.map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
.collect();
let edges = extract_erlang_call_edges(&tree, source, Some("my_mod"), &function_by_sig);
let expected = HashSet::from([(String::from("my_mod:a/0"), String::from("my_mod:b/0"))]);
assert_eq!(edges, expected);
}
#[test]
fn attributes_calls_to_enclosing_multi_clause_function() {
let source = r#"
-module(my_mod).
foo(0) -> bar();
foo(N) -> baz(N).
bar() -> ok.
baz(_N) -> ok.
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let functions = extract_erlang_functions("my_mod", &tree, source);
let function_by_sig: HashMap<(String, u32), String> = functions
.iter()
.map(|(name, arity, fqn)| ((name.clone(), *arity), fqn.clone()))
.collect();
let edges = extract_erlang_call_edges(&tree, source, Some("my_mod"), &function_by_sig);
assert!(edges.contains(&(String::from("my_mod:foo/1"), String::from("my_mod:bar/0"))));
assert!(edges.contains(&(String::from("my_mod:foo/1"), String::from("my_mod:baz/1"))));
assert_eq!(edges.len(), 2);
}
#[test]
fn extracts_called_modules_from_ast_remote_calls() {
let source = r#"
-module(my_mod).
a() -> lists:map(fun(X) -> X end, [1,2]), my_dep:run().
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let mods = extract_erlang_called_modules_from_tree(&tree, source);
assert!(mods.contains("lists"));
assert!(mods.contains("my_dep"));
}
#[test]
fn extracts_cowboy_endpoints_from_ast_multiline_tuples() {
let source = r#"
Dispatch = cowboy_router:compile([
{'_', [
{"/v1/ping", ping_handler, []},
{
"/v1/ws",
websocket_handler,
[]
}
]}
]).
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let endpoints = extract_erlang_api_endpoints(&tree, source);
let keyset: HashSet<(String, String)> = endpoints
.into_iter()
.map(|(_, path, handler)| (path, handler))
.collect();
assert!(keyset.contains(&(String::from("/v1/ping"), String::from("ping_handler"))));
assert!(keyset.contains(&(String::from("/v1/ws"), String::from("websocket_handler"))));
}
#[test]
fn extracts_external_urls_from_ast_strings_only() {
let source = r#"
-module(my_mod).
a() ->
Url = "https://api.example.com/v1/orders?x=1",
io:format("~p", [Url]).
% "https://comment.only/should/not/appear"
"#;
let tree = parse_once(LanguageId::Erlang, source).expect("Erlang parse should succeed");
let urls = extract_external_http_urls_from_tree(&tree, source);
assert!(urls.contains(&String::from("https://api.example.com/v1/orders?x=1")));
assert!(!urls.iter().any(|u| u.contains("comment.only")));
}
#[test]
fn java_parse_warnings_surface_error_nodes() {
let src = r#"
package p;
class Broken { void m( // missing close paren and brace
"#;
let tree = parse_once(LanguageId::Java, src).expect("parse");
let w = extract_java_parse_warnings(&tree, src);
assert!(
!w.is_empty(),
"expected ERROR/missing warnings, got {w:?}"
);
assert!(w.iter().any(|(line, _, _)| *line >= 2), "got {w:?}");
}
#[test]
fn java_method_extracts_return_and_param_types() {
let src = r#"
package com.example;
import java.util.List;
import org.springframework.http.ResponseEntity;
class Order {}
class C {
public ResponseEntity<List<Order>> getOrders(String id, int page) { return null; }
}
"#;
let tree = parse_once(LanguageId::Java, src).expect("parse");
let (_, methods) = extract_java_symbols(&tree, src, Some("com.example"));
let m = methods
.iter()
.find(|f| f.name == "getOrders")
.expect("getOrders");
assert_eq!(m.return_type.as_deref(), Some("ResponseEntity"));
assert_eq!(m.param_types, vec!["String", "int"]);
assert_eq!(m.param_count, 2);
}
#[test]
fn java_inheritance_edges_extends_and_implements() {
let src = r#"
package p;
class Parent {}
class Child extends Parent implements java.io.Serializable {}
"#;
let tree = parse_once(LanguageId::Java, src).expect("parse");
let edges = extract_java_inheritance_edges(&tree, src, Some("p"));
assert!(edges.contains(&(String::from("p.Child"), String::from("p.Parent"))));
assert!(edges.contains(&(
String::from("p.Child"),
String::from("java.io.Serializable")
)));
}
#[test]
fn java_class_and_method_annotations_extracted() {
let src = r#"
package p;
@Service
class Svc {
@Override
@Deprecated
void run() {}
}
"#;
let tree = parse_once(LanguageId::Java, src).expect("parse");
let c = extract_java_class_annotations(&tree, src, Some("p"));
assert!(c.iter().any(|(fqn, a)| fqn == "p.Svc" && a.contains(&String::from("Service"))));
let m = extract_java_method_annotations(&tree, src, Some("p"));
let (_, anns) = m.iter().find(|(f, _)| f.ends_with(".run")).expect("run");
assert!(anns.contains(&String::from("Override")));
assert!(anns.contains(&String::from("Deprecated")));
}
#[test]
fn java_injected_dependencies_constructor_and_autowired_field() {
let src = r#"
package p;
class OrderRepo {}
class UserService {}
class MyService {
@Autowired
OrderRepo repo;
public MyService(UserService svc, OrderRepo r2) {}
}
"#;
let tree = parse_once(LanguageId::Java, src).expect("parse");
let deps = extract_java_injected_dependencies(&tree, src, Some("p"));
assert!(deps.contains(&(String::from("p.MyService"), String::from("p.OrderRepo"))));
assert!(deps.contains(&(String::from("p.MyService"), String::from("p.UserService"))));
}
#[test]
fn go_parse_warnings_surface_error_nodes() {
let src = r#"package main
func main() { x :=
"#;
let tree = parse_once(LanguageId::Go, src).expect("parse");
let w = extract_go_parse_warnings(&tree, src);
assert!(!w.is_empty(), "expected warnings, got {w:?}");
}
#[test]
fn go_extracts_interface_kind_and_struct_embedding() {
let src = r#"package main
import "io"
type Reader interface { Read(p []byte) (n int, err error) }
type MyStruct struct {
io.Reader
Name string
}
"#;
let tree = parse_once(LanguageId::Go, src).expect("parse");
let (classes, _) = extract_go_symbols(&tree, src, Some("main"));
assert!(classes.iter().any(|c| c.name == "Reader" && c.kind == Some("interface")));
assert!(classes.iter().any(|c| c.name == "MyStruct" && c.kind == Some("struct")));
let emb = extract_go_embedding(&tree, src, Some("main"));
assert!(emb.contains(&(String::from("main.MyStruct"), String::from("io.Reader"))));
}
#[test]
fn go_goroutine_call_extracted_as_calls_function_pair() {
let src = r#"package main
func worker() {}
func main() { go worker() }
"#;
let tree = parse_once(LanguageId::Go, src).expect("parse");
let g = extract_go_goroutine_calls(&tree, src, Some("main"));
assert!(
g.contains(&(String::from("main.main"), String::from("main.worker"))),
"got {g:?}"
);
}
#[test]
fn go_method_pointer_receiver_flag() {
let src = r#"package main
type User struct{}
func (u *User) GetName() string { return "" }
func (u User) String() string { return "" }
"#;
let tree = parse_once(LanguageId::Go, src).expect("parse");
let (_, funcs) = extract_go_symbols(&tree, src, Some("main"));
let get = funcs.iter().find(|f| f.name == "GetName").expect("GetName");
assert_eq!(get.is_pointer_receiver, Some(true));
let s = funcs.iter().find(|f| f.name == "String").expect("String");
assert_eq!(s.is_pointer_receiver, Some(false));
}
#[test]
fn go_import_paths_extracted_from_grouped_import() {
let src = r#"package main
import (
"fmt"
"github.com/gorilla/mux"
"myproject/internal/handler"
)
func main() {}
"#;
let tree = parse_once(LanguageId::Go, src).expect("parse");
let imps = extract_go_imports(&tree, src);
assert!(imps.contains(&String::from("fmt")));
assert!(imps.contains(&String::from("github.com/gorilla/mux")));
assert!(imps.contains(&String::from("myproject/internal/handler")));
}
#[test]
fn go_import_resolves_to_known_scanned_file_path() {
let mut known = HashSet::new();
known.insert(String::from("/repo/myproject/internal/handler/api.go"));
let dep =
resolve_go_import_to_known_go_file("myproject/internal/handler", &known, &[], &[], None);
assert_eq!(dep.as_deref(), Some("/repo/myproject/internal/handler/api.go"));
}
#[test]
fn rust_graph_symbols_use_file_scoped_fqn() {
let src = "fn hello() {}";
let tree = parse_once(LanguageId::Rust, src).expect("parse");
let file = ParsedFile {
path: PathBuf::from("/x/a.rs"),
language: LanguageId::Rust,
tree,
source: src.to_string(),
is_test: false,
};
let syms = extract_non_java_function_symbols(&file, src, "/x/a.rs");
assert_eq!(syms.len(), 1);
assert_eq!(syms[0].fqn, "/x/a.rs::hello");
}
#[test]
fn rust_ir_emits_intrafile_calls() {
let src = r#"
fn callee() {}
fn caller() {
callee();
}
"#;
let tree = parse_once(LanguageId::Rust, src).expect("parse");
let files = vec![ParsedFile {
path: PathBuf::from("src/a.rs"),
language: LanguageId::Rust,
tree,
source: src.to_string(),
is_test: false,
}];
let ir = build_project_ir(Path::new("/repo"), &files, &ExtractOptions::default());
let has_call = ir.edges.iter().any(|e| {
e.kind == EdgeKind::CallsFunction
&& e.from_key == "src/a.rs::caller"
&& e.to_key == "src/a.rs::callee"
});
assert!(has_call, "expected intra-file CALLS_FUNCTION");
}
#[test]
fn rust_ir_emits_depends_on_file_from_use() {
let caller_src = "use crate::other;\nfn caller() {}";
let callee_src = "pub fn callee() {}";
let caller_tree = parse_once(LanguageId::Rust, caller_src).expect("parse");
let callee_tree = parse_once(LanguageId::Rust, callee_src).expect("parse");
let files = vec![
ParsedFile {
path: PathBuf::from("src/caller.rs"),
language: LanguageId::Rust,
tree: caller_tree,
source: caller_src.to_string(),
is_test: false,
},
ParsedFile {
path: PathBuf::from("src/other.rs"),
language: LanguageId::Rust,
tree: callee_tree,
source: callee_src.to_string(),
is_test: false,
},
];
let ir = build_project_ir(Path::new("/repo"), &files, &ExtractOptions::default());
let has_dep = ir.edges.iter().any(|e| {
e.kind == EdgeKind::DependsOnFile
&& e.from_key == "src/caller.rs"
&& e.to_key == "src/other.rs"
});
assert!(has_dep, "expected DEPENDS_ON_FILE from use crate::other");
}
#[test]
fn go_ir_emits_calls_function_edges() {
let src = r#"
package main
import "fmt"
func helper() { fmt.Println("x") }
func main() { helper() }
"#;
let tree = parse_once(LanguageId::Go, src).expect("parse");
let files = vec![ParsedFile {
path: PathBuf::from("main.go"),
language: LanguageId::Go,
tree,
source: src.to_string(),
is_test: false,
}];
let ir = build_project_ir(Path::new("/repo"), &files, &ExtractOptions::default());
let has_call = ir.edges.iter().any(|e| e.kind == EdgeKind::CallsFunction);
assert!(has_call, "expected at least one CALLS_FUNCTION edge for Go");
}
#[test]
fn python_graph_symbols_file_scoped_fqn_and_nested() {
let src = r#"
def top():
pass
def outer():
def inner():
pass
pass
"#;
let tree = parse_once(LanguageId::Python, src).expect("parse");
let file = ParsedFile {
path: PathBuf::from("/app/mod.py"),
language: LanguageId::Python,
tree,
source: src.to_string(),
is_test: false,
};
let syms = extract_non_java_function_symbols(&file, src, "/app/mod.py");
let fqns: Vec<&str> = syms.iter().map(|s| s.fqn.as_str()).collect();
assert!(fqns.contains(&"/app/mod.py::top"));
assert!(fqns.contains(&"/app/mod.py::outer"));
assert!(fqns.contains(&"/app/mod.py::outer.inner"));
}
#[test]
fn python_class_methods_excluded_from_graph_symbols() {
let src = r#"
class C:
def meth(self):
pass
def global_fn():
pass
"#;
let tree = parse_once(LanguageId::Python, src).expect("parse");
let file = ParsedFile {
path: PathBuf::from("/app/c.py"),
language: LanguageId::Python,
tree,
source: src.to_string(),
is_test: false,
};
let syms = extract_non_java_function_symbols(&file, src, "/app/c.py");
assert!(!syms.iter().any(|s| s.name == "meth"));
assert!(syms.iter().any(|s| s.name == "global_fn"));
}
#[test]
fn js_graph_symbols_class_method_arrow_and_top_level() {
let src = r#"
class Box {
run() { return 1; }
go = () => 2;
}
const top = () => {};
function decl() {}
"#;
let tree = parse_once(LanguageId::JavaScript, src).expect("parse");
let file = ParsedFile {
path: PathBuf::from("/app/box.js"),
language: LanguageId::JavaScript,
tree,
source: src.to_string(),
is_test: false,
};
let syms = extract_non_java_function_symbols(&file, src, "/app/box.js");
let fqns: Vec<&str> = syms.iter().map(|s| s.fqn.as_str()).collect();
assert!(
fqns.iter().any(|f| f.ends_with("::Box.run")),
"got {fqns:?}"
);
assert!(
fqns.iter().any(|f| f.ends_with("::Box.go")),
"got {fqns:?}"
);
assert!(fqns.iter().any(|f| f.ends_with("::top")), "got {fqns:?}");
assert!(fqns.iter().any(|f| f.ends_with("::decl")), "got {fqns:?}");
}
#[test]
fn ts_graph_symbols_include_class_method() {
let src = r#"
class Svc {
handle(): void {}
}
"#;
let tree = parse_once(LanguageId::TypeScript, src).expect("parse");
let file = ParsedFile {
path: PathBuf::from("/svc/h.ts"),
language: LanguageId::TypeScript,
tree,
source: src.to_string(),
is_test: false,
};
let syms = extract_non_java_function_symbols(&file, src, "/svc/h.ts");
assert!(
syms.iter().any(|s| s.fqn.ends_with("::Svc.handle")),
"got {:?}",
syms.iter().map(|s| &s.fqn).collect::<Vec<_>>()
);
}
#[test]
fn python_parse_warnings_surface_error_nodes() {
let src = "def foo(\n";
let tree = parse_once(LanguageId::Python, src).expect("parse");
let w = extract_python_parse_warnings(&tree, src);
assert!(!w.is_empty(), "expected warnings, got {w:?}");
}
#[test]
fn js_parse_warnings_surface_error_nodes() {
let src = "function f( {";
let tree = parse_once(LanguageId::JavaScript, src).expect("parse");
let w = extract_js_ts_parse_warnings(&tree, src);
assert!(!w.is_empty(), "expected warnings, got {w:?}");
}
#[test]
fn python_import_resolves_to_known_py_file() {
let mut known = HashSet::new();
known.insert("/repo/pkg/helper.py".to_string());
let dep = resolve_python_import_to_known_file("pkg.helper", &known);
assert_eq!(dep.as_deref(), Some("/repo/pkg/helper.py"));
}
#[test]
fn python_intrafile_call_edge() {
let src = r#"
def callee():
pass
def caller():
callee()
"#;
let tree = parse_once(LanguageId::Python, src).expect("parse");
let fp = "/t/a.py";
let mut name_to_fqn = HashMap::new();
name_to_fqn.insert("callee".into(), format!("{fp}::callee"));
name_to_fqn.insert("caller".into(), format!("{fp}::caller"));
let calls = extract_python_intrafile_calls(&tree, src, fp, &name_to_fqn);
assert!(
calls.contains(&(format!("{fp}::caller"), format!("{fp}::callee"))),
"got {calls:?}"
);
}
#[test]
fn js_ts_relative_import_resolves_to_known_file() {
let mut known = HashSet::new();
known.insert("/repo/src/util.ts".to_string());
let dep = resolve_js_ts_import_to_known_file("./util", "/repo/src/main.ts", &known);
assert_eq!(dep.as_deref(), Some("/repo/src/util.ts"));
}
#[test]
fn js_intrafile_call_edge() {
let src = r#"
function callee() {}
function caller() { callee(); }
"#;
let tree = parse_once(LanguageId::JavaScript, src).expect("parse");
let fp = "/t/b.js";
let mut name_to_fqn = HashMap::new();
name_to_fqn.insert("callee".into(), format!("{fp}::callee"));
name_to_fqn.insert("caller".into(), format!("{fp}::caller"));
let calls = extract_js_ts_intrafile_calls(
&tree,
src,
fp,
LanguageId::JavaScript,
&name_to_fqn,
);
assert!(
calls.contains(&(format!("{fp}::caller"), format!("{fp}::callee"))),
"got {calls:?}"
);
}
}