use std::collections::HashMap;
use tree_sitter::{Node, Parser, Tree};
use crate::ast::types::{ClassInfo, FieldInfo, FunctionInfo, ImportInfo};
use crate::cfg::types::{BlockId, BlockType, CFGBlock, CFGEdge, CFGInfo};
use crate::dfg::types::{DFGInfo, DataflowEdge, DataflowKind};
use crate::error::{Result, BrrrError};
use crate::lang::traits::Language;
pub struct Java;
#[derive(Clone, Copy, PartialEq, Eq)]
enum JavadocTag {
Param,
Return,
Throws,
Other,
}
impl JavadocTag {
#[inline]
fn detect(bytes: &[u8]) -> (Self, usize) {
if bytes.len() < 2 || bytes[0] != b'@' {
return (Self::Other, 0);
}
match bytes[1] {
b'p' => {
if bytes.len() >= 6
&& bytes[2] == b'a'
&& bytes[3] == b'r'
&& bytes[4] == b'a'
&& bytes[5] == b'm'
&& (bytes.len() == 6 || !bytes[6].is_ascii_alphabetic())
{
return (Self::Param, 6);
}
}
b'r' => {
if bytes.len() >= 7
&& bytes[2] == b'e'
&& bytes[3] == b't'
&& bytes[4] == b'u'
&& bytes[5] == b'r'
&& bytes[6] == b'n'
&& (bytes.len() == 7 || !bytes[7].is_ascii_alphabetic())
{
return (Self::Return, 7);
}
}
b't' => {
if bytes.len() >= 7
&& bytes[2] == b'h'
&& bytes[3] == b'r'
&& bytes[4] == b'o'
&& bytes[5] == b'w'
&& bytes[6] == b's'
&& (bytes.len() == 7 || !bytes[7].is_ascii_alphabetic())
{
return (Self::Throws, 7);
}
}
b'e' => {
if bytes.len() >= 10
&& bytes[2] == b'x'
&& bytes[3] == b'c'
&& bytes[4] == b'e'
&& bytes[5] == b'p'
&& bytes[6] == b't'
&& bytes[7] == b'i'
&& bytes[8] == b'o'
&& bytes[9] == b'n'
&& (bytes.len() == 10 || !bytes[10].is_ascii_alphabetic())
{
return (Self::Throws, 10); }
}
_ => {}
}
(Self::Other, 0)
}
}
#[derive(Default)]
struct ExtractedModifiers {
visibility: Option<String>,
is_static: bool,
is_final: bool,
is_abstract: bool,
is_synchronized: bool,
is_default: bool,
is_native: bool,
is_volatile: bool,
is_transient: bool,
is_strictfp: bool,
is_sealed: bool,
is_non_sealed: bool,
permits: Vec<String>,
annotations: Vec<String>,
}
impl Java {
fn node_text<'a>(&self, node: Node<'a>, source: &'a [u8]) -> &'a str {
node.utf8_text(source).unwrap_or("")
}
fn child_by_field<'a>(&self, node: Node<'a>, field: &str) -> Option<Node<'a>> {
node.child_by_field_name(field)
}
fn extract_modifiers(
&self,
node: Node,
source: &[u8],
) -> (Option<String>, bool, bool, bool, bool, bool, Vec<String>) {
let mods = self.extract_all_modifiers(node, source);
(
mods.visibility,
mods.is_static,
mods.is_final,
mods.is_abstract,
mods.is_synchronized,
mods.is_default,
mods.annotations,
)
}
fn extract_all_modifiers(&self, node: Node, source: &[u8]) -> ExtractedModifiers {
let mut mods = ExtractedModifiers::default();
let modifiers_node = node
.children(&mut node.walk())
.find(|n| n.kind() == "modifiers");
if let Some(mod_node) = modifiers_node {
let mut cursor = mod_node.walk();
for child in mod_node.children(&mut cursor) {
match child.kind() {
"public" => mods.visibility = Some("public".to_string()),
"private" => mods.visibility = Some("private".to_string()),
"protected" => mods.visibility = Some("protected".to_string()),
"static" => mods.is_static = true,
"final" => mods.is_final = true,
"abstract" => mods.is_abstract = true,
"synchronized" => mods.is_synchronized = true,
"default" => mods.is_default = true,
"native" => mods.is_native = true,
"volatile" => mods.is_volatile = true,
"transient" => mods.is_transient = true,
"strictfp" => mods.is_strictfp = true,
"sealed" => mods.is_sealed = true,
"non-sealed" => mods.is_non_sealed = true,
"marker_annotation" | "annotation" => {
mods.annotations.push(self.extract_annotation(child, source));
}
_ => {}
}
}
}
mods.permits = self.extract_permits_clause(node, source);
mods
}
fn extract_permits_clause(&self, node: Node, source: &[u8]) -> Vec<String> {
let mut permits = Vec::new();
for child in node.children(&mut node.walk()) {
if child.kind() == "permits" || child.kind() == "permits_clause" {
for type_child in child.children(&mut child.walk()) {
match type_child.kind() {
"type_identifier" | "scoped_type_identifier" => {
permits.push(self.extract_type(type_child, source));
}
"type_list" => {
for inner in type_child.children(&mut type_child.walk()) {
if matches!(inner.kind(), "type_identifier" | "scoped_type_identifier")
{
permits.push(self.extract_type(inner, source));
}
}
}
_ => {}
}
}
}
}
permits
}
fn extract_annotation(&self, node: Node, source: &[u8]) -> String {
self.node_text(node, source).to_string()
}
fn extract_type(&self, node: Node, source: &[u8]) -> String {
let text = self.node_text(node, source);
match node.kind() {
"void_type" => "void".to_string(),
"type_identifier" => {
if text == "var" {
"var".to_string()
} else {
text.to_string()
}
}
"integral_type" | "floating_point_type" | "boolean_type" => text.to_string(),
"generic_type" => self.extract_generic_type(node, source),
"array_type" => self.extract_array_type(node, source),
"scoped_type_identifier" => self.extract_scoped_type(node, source),
"wildcard" => self.extract_wildcard_type(node, source),
_ => text.to_string(),
}
}
fn extract_wildcard_type(&self, node: Node, source: &[u8]) -> String {
let mut result = String::from("?");
for child in node.children(&mut node.walk()) {
match child.kind() {
"extends" => {
result.push_str(" extends ");
}
"super" => {
result.push_str(" super ");
}
"type_identifier" | "generic_type" | "scoped_type_identifier" | "array_type" => {
result.push_str(&self.extract_type(child, source));
}
_ => {}
}
}
result
}
fn extract_generic_type(&self, node: Node, source: &[u8]) -> String {
let mut result = String::new();
for child in node.children(&mut node.walk()) {
match child.kind() {
"type_identifier" | "scoped_type_identifier" => {
result.push_str(self.node_text(child, source));
}
"type_arguments" => {
result.push_str(self.extract_type_arguments(child, source).as_str());
}
_ => {}
}
}
result
}
fn extract_type_arguments(&self, node: Node, source: &[u8]) -> String {
let mut args = Vec::new();
for child in node.children(&mut node.walk()) {
match child.kind() {
"type_identifier" | "generic_type" | "wildcard" | "scoped_type_identifier" => {
args.push(self.extract_type(child, source));
}
_ => {}
}
}
if args.is_empty() {
"<>".to_string()
} else {
format!("<{}>", args.join(", "))
}
}
fn extract_array_type(&self, node: Node, source: &[u8]) -> String {
let mut base_type = String::new();
let mut dimensions = 0;
for child in node.children(&mut node.walk()) {
match child.kind() {
"type_identifier"
| "integral_type"
| "floating_point_type"
| "boolean_type"
| "generic_type" => {
base_type = self.extract_type(child, source);
}
"dimensions" => {
dimensions = self.count_array_dimensions(child, source);
}
_ => {}
}
}
format!("{}{}", base_type, "[]".repeat(dimensions.max(1)))
}
fn count_array_dimensions(&self, dimensions_node: Node, source: &[u8]) -> usize {
let mut count = 0;
for child in dimensions_node.children(&mut dimensions_node.walk()) {
if child.kind() == "[" {
count += 1;
}
}
if count == 0 {
let text = self.node_text(dimensions_node, source);
count = text.chars().filter(|&c| c == '[').count();
}
count.max(1)
}
fn extract_scoped_type(&self, node: Node, source: &[u8]) -> String {
self.node_text(node, source).to_string()
}
fn extract_parameters(&self, node: Node, source: &[u8]) -> Vec<String> {
let mut params = Vec::new();
for child in node.children(&mut node.walk()) {
match child.kind() {
"formal_parameter" | "spread_parameter" => {
params.push(self.extract_single_parameter(child, source));
}
"receiver_parameter" => {
}
_ => {}
}
}
params
}
fn extract_single_parameter(&self, node: Node, source: &[u8]) -> String {
let mut param_type = String::new();
let mut param_name = String::new();
let mut is_vararg = false;
for child in node.children(&mut node.walk()) {
match child.kind() {
"type_identifier"
| "integral_type"
| "floating_point_type"
| "boolean_type"
| "generic_type"
| "array_type"
| "scoped_type_identifier"
| "void_type" => {
param_type = self.extract_type(child, source);
}
"identifier" => {
param_name = self.node_text(child, source).to_string();
}
"..." => {
is_vararg = true;
}
"modifiers" => {
}
"dimensions" => {
param_type.push_str("[]");
}
_ => {}
}
}
if is_vararg {
format!("{}... {}", param_type, param_name)
} else if param_name.is_empty() {
param_type
} else {
format!("{} {}", param_type, param_name)
}
}
fn extract_type_parameters(&self, node: Node, source: &[u8]) -> Option<String> {
let type_params = self.child_by_field(node, "type_parameters")?;
let mut params = Vec::new();
for child in type_params.children(&mut type_params.walk()) {
if child.kind() == "type_parameter" {
params.push(self.extract_type_parameter(child, source));
}
}
if params.is_empty() {
None
} else {
Some(format!("<{}>", params.join(", ")))
}
}
fn extract_type_parameter(&self, node: Node, source: &[u8]) -> String {
let mut result = String::new();
for child in node.children(&mut node.walk()) {
match child.kind() {
"type_identifier" => {
result.push_str(self.node_text(child, source));
}
"type_bound" => {
result.push_str(" extends ");
result.push_str(self.extract_type_bound(child, source).as_str());
}
_ => {}
}
}
result
}
fn extract_type_bound(&self, node: Node, source: &[u8]) -> String {
let mut bounds = Vec::new();
for child in node.children(&mut node.walk()) {
match child.kind() {
"type_identifier" | "generic_type" | "scoped_type_identifier" => {
bounds.push(self.extract_type(child, source));
}
_ => {}
}
}
bounds.join(" & ")
}
fn extract_javadoc(&self, node: Node, source: &[u8]) -> Option<String> {
let mut prev = node.prev_sibling();
while let Some(sibling) = prev {
match sibling.kind() {
"block_comment" => {
let comment = self.node_text(sibling, source);
if comment.starts_with("/**") {
return Some(self.parse_javadoc(comment));
}
return None;
}
"line_comment" => {
prev = sibling.prev_sibling();
}
"modifiers" | "marker_annotation" | "annotation" => {
prev = sibling.prev_sibling();
}
_ => {
return None;
}
}
}
None
}
fn parse_javadoc(&self, comment: &str) -> String {
let content = comment
.trim_start_matches("/**")
.trim_end_matches("*/")
.trim();
let lines: Vec<&str> = content
.lines()
.map(|line| line.trim().trim_start_matches('*').trim())
.filter(|line| !line.is_empty())
.collect();
let mut description_lines = Vec::new();
let mut params = Vec::new();
let mut returns = Vec::new();
let mut throws = Vec::new();
let mut current_tag: Option<JavadocTag> = None;
let mut current_value = String::new();
for line in lines {
let bytes = line.as_bytes();
if !bytes.is_empty() && bytes[0] == b'@' {
self.save_javadoc_tag(
current_tag,
¤t_value,
&mut params,
&mut returns,
&mut throws,
);
let (tag, offset) = JavadocTag::detect(bytes);
match tag {
JavadocTag::Param | JavadocTag::Return | JavadocTag::Throws => {
current_tag = Some(tag);
current_value = if offset < bytes.len() {
line[offset..].trim().to_string()
} else {
String::new()
};
}
JavadocTag::Other => {
current_tag = None;
description_lines.push(line.to_string());
}
}
} else if current_tag.is_some() {
if !current_value.is_empty() {
current_value.push(' ');
}
current_value.push_str(line);
} else {
description_lines.push(line.to_string());
}
}
self.save_javadoc_tag(
current_tag,
¤t_value,
&mut params,
&mut returns,
&mut throws,
);
let mut result = description_lines.join("\n");
if !params.is_empty() {
if !result.is_empty() {
result.push_str("\n\n");
}
result.push_str("Parameters:\n");
for param in params {
result.push_str(&format!(" {}\n", param));
}
}
if !returns.is_empty() {
if !result.is_empty() && !result.ends_with('\n') {
result.push('\n');
}
result.push_str("Returns: ");
result.push_str(&returns.join(", "));
}
if !throws.is_empty() {
if !result.is_empty() {
result.push_str("\n");
}
result.push_str("Throws:\n");
for throw in throws {
result.push_str(&format!(" {}\n", throw));
}
}
result.trim_end().to_string()
}
#[inline]
fn save_javadoc_tag(
&self,
tag: Option<JavadocTag>,
value: &str,
params: &mut Vec<String>,
returns: &mut Vec<String>,
throws: &mut Vec<String>,
) {
if value.is_empty() {
return;
}
match tag {
Some(JavadocTag::Param) => params.push(value.to_string()),
Some(JavadocTag::Return) => returns.push(value.to_string()),
Some(JavadocTag::Throws) => throws.push(value.to_string()),
Some(JavadocTag::Other) | None => {}
}
}
fn extract_superclass(&self, node: Node, source: &[u8]) -> Option<String> {
let superclass_node = self.child_by_field(node, "superclass")?;
for child in superclass_node.children(&mut superclass_node.walk()) {
match child.kind() {
"type_identifier" | "generic_type" | "scoped_type_identifier" => {
return Some(self.extract_type(child, source));
}
_ => {}
}
}
None
}
fn extract_interfaces(&self, node: Node, source: &[u8]) -> Vec<String> {
let interfaces_node = match self.child_by_field(node, "interfaces") {
Some(n) => n,
None => return Vec::new(),
};
let mut interfaces = Vec::new();
for child in interfaces_node.children(&mut interfaces_node.walk()) {
if child.kind() == "type_list" {
for type_child in child.children(&mut child.walk()) {
match type_child.kind() {
"type_identifier" | "generic_type" | "scoped_type_identifier" => {
interfaces.push(self.extract_type(type_child, source));
}
_ => {}
}
}
}
}
interfaces
}
fn extract_extends_interfaces(&self, node: Node, source: &[u8]) -> Vec<String> {
let mut interfaces = Vec::new();
for child in node.children(&mut node.walk()) {
if child.kind() == "extends_interfaces" {
for type_list_child in child.children(&mut child.walk()) {
if type_list_child.kind() == "type_list" {
for type_child in type_list_child.children(&mut type_list_child.walk()) {
match type_child.kind() {
"type_identifier" | "generic_type" | "scoped_type_identifier" => {
interfaces.push(self.extract_type(type_child, source));
}
_ => {}
}
}
}
}
}
}
interfaces
}
fn extract_methods(&self, body_node: Node, source: &[u8]) -> Vec<FunctionInfo> {
let mut methods = Vec::new();
let mut static_init_count = 0;
let mut instance_init_count = 0;
for child in body_node.children(&mut body_node.walk()) {
match child.kind() {
"method_declaration" => {
if let Some(func) = self.extract_method(child, source) {
methods.push(func);
}
}
"constructor_declaration" => {
if let Some(func) = self.extract_constructor(child, source) {
methods.push(func);
}
}
"enum_body_declarations" => {
methods.extend(self.extract_methods(child, source));
}
"static_initializer" => {
static_init_count += 1;
methods.push(self.extract_static_initializer(child, source, static_init_count));
}
"block" => {
instance_init_count += 1;
methods.push(self.extract_instance_initializer(
child,
source,
instance_init_count,
));
}
_ => {}
}
}
methods
}
fn extract_fields(&self, body_node: Node, source: &[u8]) -> Vec<FieldInfo> {
let mut fields = Vec::new();
for child in body_node.children(&mut body_node.walk()) {
if child.kind() == "field_declaration" {
fields.extend(self.extract_field(child, source));
}
}
fields
}
fn extract_field(&self, node: Node, source: &[u8]) -> Vec<FieldInfo> {
let mut fields = Vec::new();
let mut field_type: Option<String> = None;
let mut visibility: Option<String> = None;
let mut is_static = false;
let mut is_final = false;
let mut annotations = Vec::new();
for child in node.children(&mut node.walk()) {
match child.kind() {
"modifiers" => {
for mod_child in child.children(&mut child.walk()) {
match mod_child.kind() {
"public" => visibility = Some("public".to_string()),
"private" => visibility = Some("private".to_string()),
"protected" => visibility = Some("protected".to_string()),
"static" => is_static = true,
"final" => is_final = true,
"marker_annotation" | "annotation" => {
annotations.push(self.extract_annotation(mod_child, source));
}
_ => {}
}
}
}
"type_identifier"
| "integral_type"
| "floating_point_type"
| "boolean_type"
| "generic_type"
| "array_type"
| "scoped_type_identifier"
| "void_type" => {
field_type = Some(self.extract_type(child, source));
}
"variable_declarator" => {
let mut name = String::new();
let mut default_value: Option<String> = None;
let mut extra_dims = 0;
for var_child in child.children(&mut child.walk()) {
match var_child.kind() {
"identifier" => {
name = self.node_text(var_child, source).to_string();
}
"dimensions" => {
extra_dims = self.count_array_dimensions(var_child, source);
}
_ if var_child.kind().ends_with("_expression")
|| var_child.kind().ends_with("_literal")
|| var_child.kind() == "null_literal"
|| var_child.kind() == "identifier"
|| var_child.kind() == "array_initializer"
|| var_child.kind() == "object_creation_expression" =>
{
if var_child.prev_sibling().map(|s| s.kind()) == Some("=") {
default_value =
Some(self.node_text(var_child, source).to_string());
}
}
_ => {}
}
}
let final_type = if extra_dims > 0 {
field_type
.clone()
.map(|t| format!("{}{}", t, "[]".repeat(extra_dims)))
} else {
field_type.clone()
};
if !name.is_empty() {
fields.push(FieldInfo {
name,
field_type: final_type,
visibility: visibility.clone(),
is_static,
is_final,
default_value,
annotations: annotations.clone(),
line_number: node.start_position().row + 1,
});
}
}
_ => {}
}
}
fields
}
fn extract_static_initializer(
&self,
node: Node,
source: &[u8],
index: usize,
) -> FunctionInfo {
let line_number = node.start_position().row + 1;
let end_line_number = Some(node.end_position().row + 1);
let docstring = self.extract_javadoc(node, source);
FunctionInfo {
name: format!("<static_init_{}>", index),
params: Vec::new(),
return_type: None,
docstring,
is_method: false,
is_async: false,
decorators: vec!["static".to_string(), "initializer".to_string()],
line_number,
end_line_number,
language: "java".to_string(),
}
}
fn extract_instance_initializer(
&self,
node: Node,
_source: &[u8],
index: usize,
) -> FunctionInfo {
let line_number = node.start_position().row + 1;
let end_line_number = Some(node.end_position().row + 1);
FunctionInfo {
name: format!("<instance_init_{}>", index),
params: Vec::new(),
return_type: None,
docstring: None,
is_method: false,
is_async: false,
decorators: vec!["initializer".to_string()],
line_number,
end_line_number,
language: "java".to_string(),
}
}
fn extract_method(&self, node: Node, source: &[u8]) -> Option<FunctionInfo> {
let name_node = self.child_by_field(node, "name")?;
let name = self.node_text(name_node, source).to_string();
let (
visibility,
is_static,
is_final,
is_abstract,
is_synchronized,
is_default,
annotations,
) = self.extract_modifiers(node, source);
let mut decorators = annotations.clone();
if let Some(vis) = &visibility {
decorators.insert(0, vis.clone());
}
if is_static {
decorators.push("static".to_string());
}
if is_final {
decorators.push("final".to_string());
}
if is_abstract {
decorators.push("abstract".to_string());
}
if is_synchronized {
decorators.push("synchronized".to_string());
}
if is_default {
decorators.push("default".to_string());
}
let type_params = self.extract_type_parameters(node, source);
if let Some(tp) = type_params {
decorators.push(format!("generic:{}", tp));
}
let throws = self.extract_throws_clause(node, source);
if !throws.is_empty() {
decorators.push(format!("throws:{}", throws.join(", ")));
}
let return_type = self
.child_by_field(node, "type")
.map(|n| self.extract_type(n, source));
let params_node = self.child_by_field(node, "parameters")?;
let params = self.extract_parameters(params_node, source);
let docstring = self.extract_javadoc(node, source);
let line_number = node.start_position().row + 1;
let end_line_number = Some(node.end_position().row + 1);
Some(FunctionInfo {
name,
params,
return_type,
docstring,
is_method: true,
is_async: false, decorators,
line_number,
end_line_number,
language: "java".to_string(),
})
}
fn extract_throws_clause(&self, node: Node, source: &[u8]) -> Vec<String> {
let mut throws = Vec::new();
for child in node.children(&mut node.walk()) {
if child.kind() == "throws" {
for type_child in child.children(&mut child.walk()) {
match type_child.kind() {
"type_identifier" | "scoped_type_identifier" | "generic_type" => {
throws.push(self.extract_type(type_child, source));
}
_ => {}
}
}
}
}
throws
}
fn extract_constructor(&self, node: Node, source: &[u8]) -> Option<FunctionInfo> {
let name_node = self.child_by_field(node, "name")?;
let name = self.node_text(name_node, source).to_string();
let (visibility, _, _, _, _, _, annotations) = self.extract_modifiers(node, source);
let mut decorators = annotations.clone();
if let Some(vis) = visibility {
decorators.insert(0, vis);
}
decorators.push("constructor".to_string());
let throws = self.extract_throws_clause(node, source);
if !throws.is_empty() {
decorators.push(format!("throws:{}", throws.join(", ")));
}
let params_node = self.child_by_field(node, "parameters")?;
let params = self.extract_parameters(params_node, source);
let docstring = self.extract_javadoc(node, source);
let line_number = node.start_position().row + 1;
let end_line_number = Some(node.end_position().row + 1);
Some(FunctionInfo {
name,
params,
return_type: None, docstring,
is_method: true,
is_async: false,
decorators,
line_number,
end_line_number,
language: "java".to_string(),
})
}
fn extract_scoped_identifier(&self, node: Node, source: &[u8]) -> String {
let mut parts = Vec::new();
self.collect_identifier_parts(node, source, &mut parts);
parts.join(".")
}
fn collect_identifier_parts(&self, node: Node, source: &[u8], parts: &mut Vec<String>) {
match node.kind() {
"identifier" => {
parts.push(self.node_text(node, source).to_string());
}
"scoped_identifier" => {
if let Some(scope) = self.child_by_field(node, "scope") {
self.collect_identifier_parts(scope, source, parts);
}
if let Some(name) = self.child_by_field(node, "name") {
parts.push(self.node_text(name, source).to_string());
}
}
_ => {}
}
}
fn build_cfg_from_body(
&self,
body: Node,
source: &[u8],
blocks: &mut HashMap<BlockId, CFGBlock>,
edges: &mut Vec<CFGEdge>,
next_id: &mut usize,
) -> (BlockId, Vec<BlockId>) {
let entry_id = BlockId(*next_id);
*next_id += 1;
let mut statements = Vec::new();
let mut exits = Vec::new();
for child in body.children(&mut body.walk()) {
match child.kind() {
"if_statement" => {
if !statements.is_empty() {
let block = CFGBlock {
id: entry_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: body.start_position().row + 1,
end_line: child.start_position().row + 1,
};
blocks.insert(entry_id, block);
statements.clear();
}
let (if_entry, if_exits) =
self.build_cfg_for_if(child, source, blocks, edges, next_id);
edges.push(CFGEdge::from_label(entry_id, if_entry, None));
exits.extend(if_exits);
}
"while_statement" | "for_statement" | "enhanced_for_statement" => {
let (loop_entry, loop_exits) =
self.build_cfg_for_loop(child, source, blocks, edges, next_id);
if !statements.is_empty() {
let block = CFGBlock {
id: entry_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: body.start_position().row + 1,
end_line: child.start_position().row + 1,
};
blocks.insert(entry_id, block);
statements.clear();
edges.push(CFGEdge::from_label(entry_id, loop_entry, None));
}
exits.extend(loop_exits);
}
"do_statement" => {
let (do_entry, do_exits) =
self.build_cfg_for_do_while(child, source, blocks, edges, next_id);
if !statements.is_empty() {
let block = CFGBlock {
id: entry_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: body.start_position().row + 1,
end_line: child.start_position().row + 1,
};
blocks.insert(entry_id, block);
statements.clear();
edges.push(CFGEdge::from_label(entry_id, do_entry, None));
}
exits.extend(do_exits);
}
"labeled_statement" => {
let (label_entry, label_exits) =
self.build_cfg_for_labeled(child, source, blocks, edges, next_id);
if !statements.is_empty() {
let block = CFGBlock {
id: entry_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: body.start_position().row + 1,
end_line: child.start_position().row + 1,
};
blocks.insert(entry_id, block);
statements.clear();
edges.push(CFGEdge::from_label(entry_id, label_entry, None));
}
exits.extend(label_exits);
}
"break_statement" | "continue_statement" => {
statements.push(self.node_text(child, source).trim().to_string());
let stmt_id = BlockId(*next_id);
*next_id += 1;
let block = CFGBlock {
id: stmt_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: child.start_position().row + 1,
end_line: child.end_position().row + 1,
};
blocks.insert(stmt_id, block);
exits.push(stmt_id);
statements.clear();
}
"return_statement" => {
statements.push(self.node_text(child, source).trim().to_string());
let return_id = BlockId(*next_id);
*next_id += 1;
let block = CFGBlock {
id: return_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: child.start_position().row + 1,
end_line: child.end_position().row + 1,
};
blocks.insert(return_id, block);
exits.push(return_id);
statements.clear();
}
"throw_statement" => {
statements.push(self.node_text(child, source).trim().to_string());
let throw_id = BlockId(*next_id);
*next_id += 1;
let block = CFGBlock {
id: throw_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: child.start_position().row + 1,
end_line: child.end_position().row + 1,
};
blocks.insert(throw_id, block);
exits.push(throw_id);
statements.clear();
}
"yield_statement" => {
statements.push(self.node_text(child, source).trim().to_string());
let yield_id = BlockId(*next_id);
*next_id += 1;
let block = CFGBlock {
id: yield_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: child.start_position().row + 1,
end_line: child.end_position().row + 1,
};
blocks.insert(yield_id, block);
exits.push(yield_id);
statements.clear();
}
"try_statement" => {
let (try_entry, try_exits) =
self.build_cfg_for_try(child, source, blocks, edges, next_id);
if !statements.is_empty() {
let block = CFGBlock {
id: entry_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: body.start_position().row + 1,
end_line: child.start_position().row + 1,
};
blocks.insert(entry_id, block);
statements.clear();
edges.push(CFGEdge::from_label(entry_id, try_entry, None));
}
exits.extend(try_exits);
}
"switch_expression" | "switch_statement" => {
let (switch_entry, switch_exits) =
self.build_cfg_for_switch(child, source, blocks, edges, next_id);
if !statements.is_empty() {
let block = CFGBlock {
id: entry_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: body.start_position().row + 1,
end_line: child.start_position().row + 1,
};
blocks.insert(entry_id, block);
statements.clear();
edges.push(CFGEdge::from_label(entry_id, switch_entry, None));
}
exits.extend(switch_exits);
}
"synchronized_statement" => {
let (sync_entry, sync_exits) =
self.build_cfg_for_synchronized(child, source, blocks, edges, next_id);
if !statements.is_empty() {
let block = CFGBlock {
id: entry_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: body.start_position().row + 1,
end_line: child.start_position().row + 1,
};
blocks.insert(entry_id, block);
statements.clear();
edges.push(CFGEdge::from_label(entry_id, sync_entry, None));
}
exits.extend(sync_exits);
}
"assert_statement" => {
statements.push(self.node_text(child, source).trim().to_string());
let assert_id = BlockId(*next_id);
*next_id += 1;
let block = CFGBlock {
id: assert_id,
label: statements.join("; "),
block_type: BlockType::Body,
statements: statements.clone(),
func_calls: Vec::new(),
start_line: child.start_position().row + 1,
end_line: child.end_position().row + 1,
};
blocks.insert(assert_id, block);
exits.push(assert_id);
statements.clear();
}
"{" | "}" => {
}
_ if !child.is_named() => {
}
_ => {
let stmt_text = self.node_text(child, source).trim().to_string();
if !stmt_text.is_empty() {
statements.push(stmt_text);
}
}
}
}
if !statements.is_empty() || blocks.get(&entry_id).is_none() {
let block = CFGBlock {
id: entry_id,
label: if statements.is_empty() {
"entry".to_string()
} else {
statements.join("; ")
},
block_type: BlockType::Body,
statements,
func_calls: Vec::new(),
start_line: body.start_position().row + 1,
end_line: body.end_position().row + 1,
};
blocks.insert(entry_id, block);
if exits.is_empty() {
exits.push(entry_id);
}
}
(entry_id, exits)
}
fn build_cfg_for_if(
&self,
node: Node,
source: &[u8],
blocks: &mut HashMap<BlockId, CFGBlock>,
edges: &mut Vec<CFGEdge>,
next_id: &mut usize,
) -> (BlockId, Vec<BlockId>) {
let condition_id = BlockId(*next_id);
*next_id += 1;
let condition_text = self
.child_by_field(node, "condition")
.map(|n| self.node_text(n, source).to_string())
.unwrap_or_else(|| "condition".to_string());
let block = CFGBlock {
id: condition_id,
label: format!("if ({})", condition_text),
block_type: BlockType::Body,
statements: vec![format!("if ({})", condition_text)],
func_calls: Vec::new(),
start_line: node.start_position().row + 1,
end_line: node.start_position().row + 1,
};
blocks.insert(condition_id, block);
let mut exits = Vec::new();
if let Some(consequence) = self.child_by_field(node, "consequence") {
let (then_entry, then_exits) =
self.build_cfg_from_body(consequence, source, blocks, edges, next_id);
edges.push(CFGEdge::from_label(condition_id, then_entry, Some("true".to_string())));
exits.extend(then_exits);
}
if let Some(alternative) = self.child_by_field(node, "alternative") {
let (else_entry, else_exits) =
self.build_cfg_from_body(alternative, source, blocks, edges, next_id);
edges.push(CFGEdge::from_label(condition_id, else_entry, Some("false".to_string())));
exits.extend(else_exits);
} else {
exits.push(condition_id);
}
(condition_id, exits)
}
fn build_cfg_for_loop(
&self,
node: Node,
source: &[u8],
blocks: &mut HashMap<BlockId, CFGBlock>,
edges: &mut Vec<CFGEdge>,
next_id: &mut usize,
) -> (BlockId, Vec<BlockId>) {
let loop_id = BlockId(*next_id);
*next_id += 1;
let label = match node.kind() {
"while_statement" => {
let cond = self
.child_by_field(node, "condition")
.map(|n| self.node_text(n, source).to_string())
.unwrap_or_default();
format!("while ({})", cond)
}
"for_statement" => "for loop".to_string(),
"enhanced_for_statement" => "for-each loop".to_string(),
_ => "loop".to_string(),
};
let block = CFGBlock {
id: loop_id,
label: label.clone(),
block_type: BlockType::Body,
statements: vec![label],
func_calls: Vec::new(),
start_line: node.start_position().row + 1,
end_line: node.start_position().row + 1,
};
blocks.insert(loop_id, block);
if let Some(body) = self.child_by_field(node, "body") {
let (body_entry, body_exits) =
self.build_cfg_from_body(body, source, blocks, edges, next_id);
edges.push(CFGEdge::from_label(loop_id, body_entry, Some("true".to_string())));
for exit in &body_exits {
edges.push(CFGEdge::from_label(*exit, loop_id, None));
}
}
let exit_id = BlockId(*next_id);
*next_id += 1;
let exit_block = CFGBlock {
id: exit_id,
label: "loop exit".to_string(),
block_type: BlockType::Body,
statements: vec![],
func_calls: Vec::new(),
start_line: node.end_position().row + 1,
end_line: node.end_position().row + 1,
};
blocks.insert(exit_id, exit_block);
edges.push(CFGEdge::from_label(loop_id, exit_id, Some("false".to_string())));
(loop_id, vec![exit_id])
}
fn build_cfg_for_try(
&self,
node: Node,
source: &[u8],
blocks: &mut HashMap<BlockId, CFGBlock>,
edges: &mut Vec<CFGEdge>,
next_id: &mut usize,
) -> (BlockId, Vec<BlockId>) {
let try_id = BlockId(*next_id);
*next_id += 1;
let resource_spec = node
.children(&mut node.walk())
.find(|n| n.kind() == "resource_specification");
let try_label = if let Some(ref res) = resource_spec {
let resources_text = self.node_text(*res, source);
format!("try {}", resources_text)
} else {
"try".to_string()
};
let block = CFGBlock {
id: try_id,
label: try_label.clone(),
block_type: BlockType::Body,
statements: vec![try_label],
func_calls: Vec::new(),
start_line: node.start_position().row + 1,
end_line: node.start_position().row + 1,
};
blocks.insert(try_id, block);
let mut exits = Vec::new();
let effective_entry = if let Some(ref res) = resource_spec {
let resource_id = BlockId(*next_id);
*next_id += 1;
let resource_text = self.extract_resource_declarations(*res, source);
let resource_block = CFGBlock {
id: resource_id,
label: format!("acquire resources: {}", resource_text),
block_type: BlockType::Body,
statements: vec![format!("acquire: {}", resource_text)],
func_calls: Vec::new(),
start_line: res.start_position().row + 1,
end_line: res.end_position().row + 1,
};
blocks.insert(resource_id, resource_block);
edges.push(CFGEdge::from_label(try_id, resource_id, Some("acquire".to_string())));
resource_id
} else {
try_id
};
if let Some(body) = self.child_by_field(node, "body") {
let (body_entry, body_exits) =
self.build_cfg_from_body(body, source, blocks, edges, next_id);
if resource_spec.is_some() {
edges.push(CFGEdge::from_label(effective_entry, body_entry, Some("success".to_string())));
} else {
edges.push(CFGEdge::from_label(try_id, body_entry, None));
}
exits.extend(body_exits);
}
let close_block_id = if resource_spec.is_some() {
let close_id = BlockId(*next_id);
*next_id += 1;
let close_block = CFGBlock {
id: close_id,
label: "close resources".to_string(),
block_type: BlockType::Body,
statements: vec!["close resources (auto)".to_string()],
func_calls: Vec::new(),
start_line: node.end_position().row + 1,
end_line: node.end_position().row + 1,
};
blocks.insert(close_id, close_block);
for exit in &exits {
edges.push(CFGEdge::from_label(*exit, close_id, Some("close".to_string())));
}
exits = vec![close_id];
Some(close_id)
} else {
None
};
for child in node.children(&mut node.walk()) {
if child.kind() == "catch_clause" {
if let Some(catch_body) = self.child_by_field(child, "body") {
let (catch_entry, catch_exits) =
self.build_cfg_from_body(catch_body, source, blocks, edges, next_id);
if resource_spec.is_some() {
edges.push(CFGEdge::from_label(effective_entry, catch_entry, Some("exception (acquire)".to_string())));
if let Some(close_id) = close_block_id {
edges.push(CFGEdge::from_label(close_id, catch_entry, Some("exception (close)".to_string())));
}
} else {
edges.push(CFGEdge::from_label(try_id, catch_entry, Some("exception".to_string())));
}
exits.extend(catch_exits);
}
}
}
if let Some(finally_clause) = node
.children(&mut node.walk())
.find(|n| n.kind() == "finally_clause")
{
if let Some(finally_body) = self.child_by_field(finally_clause, "body") {
let (finally_entry, finally_exits) =
self.build_cfg_from_body(finally_body, source, blocks, edges, next_id);
for exit in &exits {
edges.push(CFGEdge::from_label(*exit, finally_entry, None));
}
exits = finally_exits;
}
}
(try_id, exits)
}
fn extract_resource_declarations(&self, node: Node, source: &[u8]) -> String {
let mut resources = Vec::new();
for child in node.children(&mut node.walk()) {
match child.kind() {
"resource" => {
if let Some(name) = child
.children(&mut child.walk())
.find(|n| n.kind() == "identifier")
{
resources.push(self.node_text(name, source).to_string());
}
}
"identifier" => {
resources.push(self.node_text(child, source).to_string());
}
_ => {}
}
}
if resources.is_empty() {
"resources".to_string()
} else {
resources.join(", ")
}
}
fn build_cfg_for_switch(
&self,
node: Node,
source: &[u8],
blocks: &mut HashMap<BlockId, CFGBlock>,
edges: &mut Vec<CFGEdge>,
next_id: &mut usize,
) -> (BlockId, Vec<BlockId>) {
let switch_id = BlockId(*next_id);
*next_id += 1;
let condition = self
.child_by_field(node, "condition")
.map(|n| self.node_text(n, source).to_string())
.unwrap_or_else(|| "expr".to_string());
let block = CFGBlock {
id: switch_id,
label: format!("switch ({})", condition),
block_type: BlockType::Body,
statements: vec![format!("switch ({})", condition)],
func_calls: Vec::new(),
start_line: node.start_position().row + 1,
end_line: node.start_position().row + 1,
};
blocks.insert(switch_id, block);
let mut exits = Vec::new();
if let Some(body) = self.child_by_field(node, "body") {
for child in body.children(&mut body.walk()) {
match child.kind() {
"switch_rule" => {
let case_id = BlockId(*next_id);
*next_id += 1;
let case_text = self.node_text(child, source);
let case_label = case_text
.lines()
.next()
.and_then(|s| s.split("->").next())
.map(|s| s.trim().to_string())
.unwrap_or_else(|| "case".to_string());
let case_block = CFGBlock {
id: case_id,
label: case_label.clone(),
block_type: BlockType::Body,
statements: vec![case_text.to_string()],
func_calls: Vec::new(),
start_line: child.start_position().row + 1,
end_line: child.end_position().row + 1,
};
blocks.insert(case_id, case_block);
edges.push(CFGEdge::from_label(switch_id, case_id, Some(case_label)));
exits.push(case_id);
}
"switch_block_statement_group" => {
let case_id = BlockId(*next_id);
*next_id += 1;
let case_label = self
.node_text(child, source)
.lines()
.next()
.unwrap_or("case")
.to_string();
let case_block = CFGBlock {
id: case_id,
label: case_label.clone(),
block_type: BlockType::Body,
statements: vec![case_label],
func_calls: Vec::new(),
start_line: child.start_position().row + 1,
end_line: child.end_position().row + 1,
};
blocks.insert(case_id, case_block);
edges.push(CFGEdge::from_label(switch_id, case_id, None));
exits.push(case_id);
}
_ => {}
}
}
}
if exits.is_empty() {
exits.push(switch_id);
}
(switch_id, exits)
}
fn build_cfg_for_do_while(
&self,
node: Node,
source: &[u8],
blocks: &mut HashMap<BlockId, CFGBlock>,
edges: &mut Vec<CFGEdge>,
next_id: &mut usize,
) -> (BlockId, Vec<BlockId>) {
let body_id = BlockId(*next_id);
*next_id += 1;
let body_label = "do".to_string();
let body_block = CFGBlock {
id: body_id,
label: body_label.clone(),
block_type: BlockType::Body,
statements: vec![body_label],
func_calls: Vec::new(),
start_line: node.start_position().row + 1,
end_line: node.start_position().row + 1,
};
blocks.insert(body_id, body_block);
let mut body_exits = vec![body_id];
if let Some(body) = self.child_by_field(node, "body") {
let (inner_entry, inner_exits) =
self.build_cfg_from_body(body, source, blocks, edges, next_id);
edges.push(CFGEdge::from_label(body_id, inner_entry, None));
body_exits = inner_exits;
}
let condition_id = BlockId(*next_id);
*next_id += 1;
let condition_text = self
.child_by_field(node, "condition")
.map(|n| self.node_text(n, source).to_string())
.unwrap_or_else(|| "condition".to_string());
let condition_block = CFGBlock {
id: condition_id,
label: format!("while ({})", condition_text),
block_type: BlockType::Body,
statements: vec![format!("while ({})", condition_text)],
func_calls: Vec::new(),
start_line: node.end_position().row + 1,
end_line: node.end_position().row + 1,
};
blocks.insert(condition_id, condition_block);
for exit in &body_exits {
edges.push(CFGEdge::from_label(*exit, condition_id, None));
}
edges.push(CFGEdge::from_label(condition_id, body_id, Some("true".to_string())));
let exit_id = BlockId(*next_id);
*next_id += 1;
let exit_block = CFGBlock {
id: exit_id,
label: "do-while exit".to_string(),
block_type: BlockType::Body,
statements: vec![],
func_calls: Vec::new(),
start_line: node.end_position().row + 1,
end_line: node.end_position().row + 1,
};
blocks.insert(exit_id, exit_block);
edges.push(CFGEdge::from_label(condition_id, exit_id, Some("false".to_string())));
(body_id, vec![exit_id])
}
fn build_cfg_for_labeled(
&self,
node: Node,
source: &[u8],
blocks: &mut HashMap<BlockId, CFGBlock>,
edges: &mut Vec<CFGEdge>,
next_id: &mut usize,
) -> (BlockId, Vec<BlockId>) {
let label_id = BlockId(*next_id);
*next_id += 1;
let label_name = node
.children(&mut node.walk())
.find(|n| n.kind() == "identifier")
.map(|n| self.node_text(n, source).to_string())
.unwrap_or_else(|| "label".to_string());
let label_block = CFGBlock {
id: label_id,
label: format!("{}:", label_name),
block_type: BlockType::Body,
statements: vec![format!("{}:", label_name)],
func_calls: Vec::new(),
start_line: node.start_position().row + 1,
end_line: node.start_position().row + 1,
};
blocks.insert(label_id, label_block);
let mut exits = vec![label_id];
for child in node.children(&mut node.walk()) {
match child.kind() {
"identifier" | ":" => continue, _ => {
let (stmt_entry, stmt_exits) =
self.build_cfg_from_body(child, source, blocks, edges, next_id);
edges.push(CFGEdge::from_label(label_id, stmt_entry, None));
exits = stmt_exits;
break;
}
}
}
(label_id, exits)
}
fn build_cfg_for_synchronized(
&self,
node: Node,
source: &[u8],
blocks: &mut HashMap<BlockId, CFGBlock>,
edges: &mut Vec<CFGEdge>,
next_id: &mut usize,
) -> (BlockId, Vec<BlockId>) {
let sync_id = BlockId(*next_id);
*next_id += 1;
let lock_expr = node
.children(&mut node.walk())
.find(|n| n.kind() == "parenthesized_expression")
.map(|n| self.node_text(n, source).to_string())
.unwrap_or_else(|| "(lock)".to_string());
let sync_block = CFGBlock {
id: sync_id,
label: format!("synchronized {}", lock_expr),
block_type: BlockType::Body,
statements: vec![format!("synchronized {}", lock_expr)],
func_calls: Vec::new(),
start_line: node.start_position().row + 1,
end_line: node.start_position().row + 1,
};
blocks.insert(sync_id, sync_block);
let mut exits = vec![sync_id];
if let Some(body) = self.child_by_field(node, "body") {
let (body_entry, body_exits) =
self.build_cfg_from_body(body, source, blocks, edges, next_id);
edges.push(CFGEdge::from_label(sync_id, body_entry, Some("acquire".to_string())));
exits = body_exits;
}
(sync_id, exits)
}
fn extract_definitions(
&self,
node: Node,
source: &[u8],
definitions: &mut HashMap<String, Vec<usize>>,
edges: &mut Vec<DataflowEdge>,
) {
match node.kind() {
"local_variable_declaration" | "field_declaration" => {
for child in node.children(&mut node.walk()) {
if child.kind() == "variable_declarator" {
if let Some(name_node) = self.child_by_field(child, "name") {
let var_name = self.node_text(name_node, source).to_string();
let line = node.start_position().row + 1;
definitions.entry(var_name.clone()).or_default().push(line);
edges.push(DataflowEdge {
variable: var_name,
from_line: line,
to_line: line,
kind: DataflowKind::Definition,
});
}
}
}
}
"assignment_expression" => {
if let Some(left) = self.child_by_field(node, "left") {
let var_name = self.node_text(left, source).to_string();
let line = node.start_position().row + 1;
definitions.entry(var_name.clone()).or_default().push(line);
edges.push(DataflowEdge {
variable: var_name,
from_line: line,
to_line: line,
kind: DataflowKind::Mutation,
});
}
}
"update_expression" => {
for child in node.children(&mut node.walk()) {
if child.kind() == "identifier" {
let var_name = self.node_text(child, source).to_string();
let line = node.start_position().row + 1;
definitions.entry(var_name.clone()).or_default().push(line);
edges.push(DataflowEdge {
variable: var_name,
from_line: line,
to_line: line,
kind: DataflowKind::Mutation,
});
break;
}
}
}
"compound_assignment_expression" => {
if let Some(left) = self.child_by_field(node, "left") {
let var_name = self.node_text(left, source).to_string();
let line = node.start_position().row + 1;
definitions.entry(var_name.clone()).or_default().push(line);
edges.push(DataflowEdge {
variable: var_name,
from_line: line,
to_line: line,
kind: DataflowKind::Mutation,
});
}
}
"instanceof_expression" => {
let mut found_type = false;
for child in node.children(&mut node.walk()) {
match child.kind() {
"type_identifier" | "generic_type" | "scoped_type_identifier" => {
found_type = true;
}
"identifier" if found_type => {
let var_name = self.node_text(child, source).to_string();
let line = node.start_position().row + 1;
definitions.entry(var_name.clone()).or_default().push(line);
edges.push(DataflowEdge {
variable: var_name,
from_line: line,
to_line: line,
kind: DataflowKind::Definition,
});
break;
}
_ => {}
}
}
}
_ => {}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.extract_definitions(child, source, definitions, edges);
}
}
fn extract_uses(
&self,
node: Node,
source: &[u8],
definitions: &HashMap<String, Vec<usize>>,
uses: &mut HashMap<String, Vec<usize>>,
edges: &mut Vec<DataflowEdge>,
) {
match node.kind() {
"identifier" => {
let parent = node.parent();
let is_definition = parent.is_some_and(|p| {
matches!(p.kind(), "variable_declarator" | "formal_parameter")
&& self.child_by_field(p, "name") == Some(node)
});
if !is_definition {
let var_name = self.node_text(node, source).to_string();
if definitions.contains_key(&var_name) {
let line = node.start_position().row + 1;
uses.entry(var_name.clone()).or_default().push(line);
if let Some(def_lines) = definitions.get(&var_name) {
if let Some(&def_line) = def_lines.iter().filter(|&&l| l <= line).max()
{
edges.push(DataflowEdge {
variable: var_name,
from_line: def_line,
to_line: line,
kind: DataflowKind::Use,
});
}
}
}
}
}
"return_statement" => {
let line = node.start_position().row + 1;
for child in node.children(&mut node.walk()) {
if child.kind() == "identifier" {
let var_name = self.node_text(child, source).to_string();
if definitions.contains_key(&var_name) {
edges.push(DataflowEdge {
variable: var_name,
from_line: line,
to_line: line,
kind: DataflowKind::Return,
});
}
}
}
}
_ => {}
}
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
self.extract_uses(child, source, definitions, uses, edges);
}
}
}
impl Language for Java {
fn name(&self) -> &'static str {
"java"
}
fn extensions(&self) -> &[&'static str] {
&[".java"]
}
fn parser(&self) -> Result<Parser> {
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_java::LANGUAGE.into())
.map_err(|e| BrrrError::TreeSitter(e.to_string()))?;
Ok(parser)
}
fn extract_function(&self, node: Node, source: &[u8]) -> Option<FunctionInfo> {
match node.kind() {
"method_declaration" => self.extract_method(node, source),
"constructor_declaration" => self.extract_constructor(node, source),
_ => None,
}
}
fn extract_class(&self, node: Node, source: &[u8]) -> Option<ClassInfo> {
match node.kind() {
"class_declaration" => self.extract_class_declaration(node, source),
"interface_declaration" => self.extract_interface_declaration(node, source),
"enum_declaration" => self.extract_enum_declaration(node, source),
"record_declaration" => self.extract_record_declaration(node, source),
"annotation_type_declaration" => self.extract_annotation_type_declaration(node, source),
"module_declaration" => self.extract_module_declaration(node, source),
_ => None,
}
}
fn extract_imports(&self, tree: &Tree, source: &[u8]) -> Vec<ImportInfo> {
let mut imports = Vec::new();
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
match child.kind() {
"package_declaration" => {
if let Some(pkg_info) = self.extract_package_declaration(child, source) {
imports.push(pkg_info);
}
}
"import_declaration" => {
if let Some(import) = self.extract_import(child, source) {
imports.push(import);
}
}
_ => {}
}
}
imports
}
fn function_query(&self) -> &'static str {
r#"[
(method_declaration name: (identifier) @name) @method
(constructor_declaration name: (identifier) @name) @constructor
]"#
}
fn class_query(&self) -> &'static str {
r#"[
(class_declaration name: (identifier) @name) @class
(interface_declaration name: (identifier) @name) @interface
(enum_declaration name: (identifier) @name) @enum
(record_declaration name: (identifier) @name) @record
(annotation_type_declaration name: (identifier) @name) @annotation_type
]"#
}
fn call_query(&self) -> &'static str {
r#"[
(method_invocation name: (identifier) @callee) @call
(object_creation_expression type: (type_identifier) @callee) @call
(lambda_expression
body: (method_invocation name: (identifier) @callee)) @call
(method_reference . (_) "::" (identifier) @callee) @call
]"#
}
fn build_cfg(&self, node: Node, source: &[u8]) -> Result<CFGInfo> {
let function_name = self
.child_by_field(node, "name")
.map(|n| self.node_text(n, source).to_string())
.unwrap_or_else(|| "unknown".to_string());
let mut blocks = HashMap::new();
let mut edges = Vec::new();
let mut next_id = 0usize;
let body = match node.kind() {
"method_declaration" => self.child_by_field(node, "body"),
"constructor_declaration" => self.child_by_field(node, "body"),
_ => None,
};
let (entry, exits) = if let Some(body_node) = body {
self.build_cfg_from_body(body_node, source, &mut blocks, &mut edges, &mut next_id)
} else {
let id = BlockId(0);
let block = CFGBlock {
id,
label: "abstract".to_string(),
block_type: BlockType::Body,
statements: vec![],
func_calls: Vec::new(),
start_line: node.start_position().row + 1,
end_line: node.end_position().row + 1,
};
blocks.insert(id, block);
(id, vec![id])
};
Ok(CFGInfo {
function_name,
blocks,
edges,
entry,
exits,
decision_points: 0, comprehension_decision_points: 0, nested_cfgs: HashMap::new(), is_async: false, await_points: 0, blocking_calls: Vec::new(), ..Default::default()
})
}
fn build_dfg(&self, node: Node, source: &[u8]) -> Result<DFGInfo> {
let function_name = self
.child_by_field(node, "name")
.map(|n| self.node_text(n, source).to_string())
.unwrap_or_else(|| "unknown".to_string());
let mut definitions = HashMap::new();
let mut uses = HashMap::new();
let mut edges = Vec::new();
if let Some(params) = self.child_by_field(node, "parameters") {
for child in params.children(&mut params.walk()) {
if child.kind() == "formal_parameter" {
if let Some(name_node) = self.child_by_field(child, "name") {
let var_name = self.node_text(name_node, source).to_string();
let line = child.start_position().row + 1;
definitions
.entry(var_name.clone())
.or_insert_with(Vec::new)
.push(line);
edges.push(DataflowEdge {
variable: var_name,
from_line: line,
to_line: line,
kind: DataflowKind::Param,
});
}
}
}
}
if let Some(body) = self.child_by_field(node, "body") {
self.extract_definitions(body, source, &mut definitions, &mut edges);
self.extract_uses(body, source, &definitions, &mut uses, &mut edges);
}
Ok(DFGInfo::new(function_name, edges, definitions, uses))
}
}
impl Java {
fn extract_class_declaration(&self, node: Node, source: &[u8]) -> Option<ClassInfo> {
let name_node = self.child_by_field(node, "name")?;
let name = self.node_text(name_node, source).to_string();
let (visibility, is_static, is_final, is_abstract, _, _, annotations) =
self.extract_modifiers(node, source);
let mut decorators = annotations;
if let Some(vis) = visibility {
decorators.insert(0, vis);
}
if is_static {
decorators.push("static".to_string());
}
if is_final {
decorators.push("final".to_string());
}
if is_abstract {
decorators.push("abstract".to_string());
}
if let Some(tp) = self.extract_type_parameters(node, source) {
decorators.push(format!("generic:{}", tp));
}
let mut bases = Vec::new();
if let Some(superclass) = self.extract_superclass(node, source) {
bases.push(format!("extends {}", superclass));
}
for interface in self.extract_interfaces(node, source) {
bases.push(format!("implements {}", interface));
}
let docstring = self.extract_javadoc(node, source);
let (methods, fields, inner_classes) = if let Some(body) = self.child_by_field(node, "body")
{
(
self.extract_methods(body, source),
self.extract_fields(body, source),
self.extract_inner_classes(body, source),
)
} else {
(Vec::new(), Vec::new(), Vec::new())
};
let line_number = node.start_position().row + 1;
let end_line_number = Some(node.end_position().row + 1);
Some(ClassInfo {
name,
bases,
docstring,
methods,
fields,
inner_classes,
decorators,
line_number,
end_line_number,
language: "java".to_string(),
})
}
fn extract_inner_classes(&self, body_node: Node, source: &[u8]) -> Vec<ClassInfo> {
let mut inner_classes = Vec::new();
for child in body_node.children(&mut body_node.walk()) {
match child.kind() {
"class_declaration" => {
if let Some(class) = self.extract_class_declaration(child, source) {
inner_classes.push(class);
}
}
"interface_declaration" => {
if let Some(interface) = self.extract_interface_declaration(child, source) {
inner_classes.push(interface);
}
}
"enum_declaration" => {
if let Some(enum_class) = self.extract_enum_declaration(child, source) {
inner_classes.push(enum_class);
}
}
"record_declaration" => {
if let Some(record) = self.extract_record_declaration(child, source) {
inner_classes.push(record);
}
}
"annotation_type_declaration" => {
if let Some(annotation) = self.extract_annotation_type_declaration(child, source)
{
inner_classes.push(annotation);
}
}
_ => {}
}
}
inner_classes
}
fn extract_interface_declaration(&self, node: Node, source: &[u8]) -> Option<ClassInfo> {
let name_node = self.child_by_field(node, "name")?;
let name = self.node_text(name_node, source).to_string();
let (visibility, _, _, _, _, _, annotations) = self.extract_modifiers(node, source);
let mut decorators = vec!["interface".to_string()];
decorators.extend(annotations);
if let Some(vis) = visibility {
decorators.insert(1, vis);
}
if let Some(tp) = self.extract_type_parameters(node, source) {
decorators.push(format!("generic:{}", tp));
}
let bases: Vec<String> = self
.extract_extends_interfaces(node, source)
.into_iter()
.map(|i| format!("extends {}", i))
.collect();
let docstring = self.extract_javadoc(node, source);
let (methods, fields, inner_classes) = if let Some(body) = self.child_by_field(node, "body")
{
(
self.extract_methods(body, source),
self.extract_fields(body, source),
self.extract_inner_classes(body, source),
)
} else {
(Vec::new(), Vec::new(), Vec::new())
};
let line_number = node.start_position().row + 1;
let end_line_number = Some(node.end_position().row + 1);
Some(ClassInfo {
name,
bases,
docstring,
methods,
fields,
inner_classes,
decorators,
line_number,
end_line_number,
language: "java".to_string(),
})
}
fn extract_enum_declaration(&self, node: Node, source: &[u8]) -> Option<ClassInfo> {
let name_node = self.child_by_field(node, "name")?;
let name = self.node_text(name_node, source).to_string();
let (visibility, _, _, _, _, _, annotations) = self.extract_modifiers(node, source);
let mut decorators = vec!["enum".to_string()];
decorators.extend(annotations);
if let Some(vis) = visibility {
decorators.insert(1, vis);
}
let bases: Vec<String> = self
.extract_interfaces(node, source)
.into_iter()
.map(|i| format!("implements {}", i))
.collect();
let docstring = self.extract_javadoc(node, source);
let (methods, fields, inner_classes) =
if let Some(body) = self.child_by_field(node, "body") {
let enum_constants = self.extract_enum_constants(body, source);
let regular_fields = self.extract_fields(body, source);
let mut all_fields = enum_constants;
all_fields.extend(regular_fields);
(
self.extract_methods(body, source),
all_fields,
self.extract_inner_classes(body, source),
)
} else {
(Vec::new(), Vec::new(), Vec::new())
};
let line_number = node.start_position().row + 1;
let end_line_number = Some(node.end_position().row + 1);
Some(ClassInfo {
name: name.clone(),
bases,
docstring,
methods,
fields,
inner_classes,
decorators,
line_number,
end_line_number,
language: "java".to_string(),
})
}
fn extract_enum_constants(&self, body_node: Node, source: &[u8]) -> Vec<FieldInfo> {
let mut constants = Vec::new();
for child in body_node.children(&mut body_node.walk()) {
if child.kind() == "enum_constant" {
if let Some(field) = self.extract_enum_constant(child, source) {
constants.push(field);
}
}
}
constants
}
fn extract_enum_constant(&self, node: Node, source: &[u8]) -> Option<FieldInfo> {
let name = node
.children(&mut node.walk())
.find(|n| n.kind() == "identifier")
.map(|n| self.node_text(n, source).to_string())?;
let mut annotations = Vec::new();
if let Some(modifiers) = node
.children(&mut node.walk())
.find(|n| n.kind() == "modifiers")
{
for child in modifiers.children(&mut modifiers.walk()) {
if matches!(child.kind(), "marker_annotation" | "annotation") {
annotations.push(self.extract_annotation(child, source));
}
}
}
let default_value = node
.children(&mut node.walk())
.find(|n| n.kind() == "argument_list")
.map(|n| self.node_text(n, source).to_string());
Some(FieldInfo {
name,
field_type: Some("enum_constant".to_string()),
visibility: Some("public".to_string()),
is_static: true,
is_final: true,
default_value,
annotations,
line_number: node.start_position().row + 1,
})
}
fn extract_record_declaration(&self, node: Node, source: &[u8]) -> Option<ClassInfo> {
let name_node = self.child_by_field(node, "name")?;
let name = self.node_text(name_node, source).to_string();
let (visibility, _, _, _, _, _, annotations) = self.extract_modifiers(node, source);
let mut decorators = vec!["record".to_string()];
decorators.extend(annotations);
if let Some(vis) = visibility {
decorators.insert(1, vis);
}
if let Some(tp) = self.extract_type_parameters(node, source) {
decorators.push(format!("generic:{}", tp));
}
let bases: Vec<String> = self
.extract_interfaces(node, source)
.into_iter()
.map(|i| format!("implements {}", i))
.collect();
let docstring = self.extract_javadoc(node, source);
let (methods, fields, inner_classes) = if let Some(body) = self.child_by_field(node, "body")
{
(
self.extract_methods(body, source),
self.extract_fields(body, source),
self.extract_inner_classes(body, source),
)
} else {
(Vec::new(), Vec::new(), Vec::new())
};
let line_number = node.start_position().row + 1;
let end_line_number = Some(node.end_position().row + 1);
Some(ClassInfo {
name,
bases,
docstring,
methods,
fields,
inner_classes,
decorators,
line_number,
end_line_number,
language: "java".to_string(),
})
}
fn extract_annotation_type_declaration(&self, node: Node, source: &[u8]) -> Option<ClassInfo> {
let name_node = self.child_by_field(node, "name")?;
let name = self.node_text(name_node, source).to_string();
let (visibility, _, _, _, _, _, annotations) = self.extract_modifiers(node, source);
let mut decorators = vec!["annotation".to_string()];
decorators.extend(annotations);
if let Some(vis) = visibility {
decorators.insert(1, vis);
}
let docstring = self.extract_javadoc(node, source);
let (methods, fields, inner_classes) = if let Some(body) = self.child_by_field(node, "body")
{
(
self.extract_methods(body, source),
self.extract_fields(body, source),
self.extract_inner_classes(body, source),
)
} else {
(Vec::new(), Vec::new(), Vec::new())
};
let line_number = node.start_position().row + 1;
let end_line_number = Some(node.end_position().row + 1);
Some(ClassInfo {
name,
bases: Vec::new(),
docstring,
methods,
fields,
inner_classes,
decorators,
line_number,
end_line_number,
language: "java".to_string(),
})
}
fn extract_module_declaration(&self, node: Node, source: &[u8]) -> Option<ClassInfo> {
let name = node
.children(&mut node.walk())
.find(|n| matches!(n.kind(), "identifier" | "scoped_identifier"))
.map(|n| self.extract_scoped_identifier(n, source))?;
let mut decorators = vec!["module".to_string()];
let mut bases = Vec::new();
if node.children(&mut node.walk()).any(|n| n.kind() == "open") {
decorators.push("open".to_string());
}
if let Some(body) = node
.children(&mut node.walk())
.find(|n| n.kind() == "module_body")
{
for directive in body.children(&mut body.walk()) {
match directive.kind() {
"requires_module_directive" => {
let directive_text = self.node_text(directive, source).trim().to_string();
bases.push(directive_text);
}
"exports_module_directive" => {
let directive_text = self.node_text(directive, source).trim().to_string();
bases.push(directive_text);
}
"opens_module_directive" => {
let directive_text = self.node_text(directive, source).trim().to_string();
bases.push(directive_text);
}
"uses_module_directive" => {
let directive_text = self.node_text(directive, source).trim().to_string();
bases.push(directive_text);
}
"provides_module_directive" => {
let directive_text = self.node_text(directive, source).trim().to_string();
bases.push(directive_text);
}
_ => {}
}
}
}
let docstring = self.extract_javadoc(node, source);
let line_number = node.start_position().row + 1;
let end_line_number = Some(node.end_position().row + 1);
Some(ClassInfo {
name,
bases,
docstring,
methods: Vec::new(),
fields: Vec::new(),
inner_classes: Vec::new(),
decorators,
line_number,
end_line_number,
language: "java".to_string(),
})
}
fn extract_package_declaration(&self, node: Node, source: &[u8]) -> Option<ImportInfo> {
for child in node.children(&mut node.walk()) {
if child.kind() == "scoped_identifier" || child.kind() == "identifier" {
let package_name = self.extract_scoped_identifier(child, source);
return Some(ImportInfo {
module: package_name,
names: vec![],
aliases: {
let mut m = HashMap::new();
m.insert("package".to_string(), "true".to_string());
m
},
is_from: false,
level: 0,
line_number: node.start_position().row + 1,
visibility: None, });
}
}
None
}
fn extract_import(&self, node: Node, source: &[u8]) -> Option<ImportInfo> {
let mut is_static = false;
let mut path_parts = Vec::new();
for child in node.children(&mut node.walk()) {
match child.kind() {
"static" => {
is_static = true;
}
"scoped_identifier" | "identifier" => {
path_parts.push(self.extract_scoped_identifier(child, source));
}
"asterisk" => {
path_parts.push("*".to_string());
}
_ => {}
}
}
if path_parts.is_empty() {
return None;
}
let full_path = path_parts.join(".");
let (module, names) = if full_path.ends_with(".*") {
let module = full_path.trim_end_matches(".*").to_string();
(module, vec!["*".to_string()])
} else if is_static {
let parts: Vec<&str> = full_path.rsplitn(2, '.').collect();
if parts.len() == 2 {
(parts[1].to_string(), vec![parts[0].to_string()])
} else {
(full_path.clone(), vec![])
}
} else {
(full_path, vec![])
};
let mut aliases = HashMap::new();
if is_static {
aliases.insert("static".to_string(), "true".to_string());
}
let is_from = is_static || !names.is_empty();
Some(ImportInfo {
module,
names,
aliases,
is_from,
level: 0, line_number: node.start_position().row + 1,
visibility: None, })
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_java(code: &str) -> Tree {
let java = Java;
let mut parser = java.parser().unwrap();
parser.parse(code, None).unwrap()
}
#[test]
fn test_extract_simple_method() {
let code = r#"
public class Test {
public String getName() {
return name;
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let func = java.extract_function(child, code.as_bytes()).unwrap();
assert_eq!(func.name, "getName");
assert_eq!(func.return_type, Some("String".to_string()));
assert!(func.params.is_empty());
assert!(func.decorators.contains(&"public".to_string()));
break;
}
}
}
#[test]
fn test_extract_method_with_params() {
let code = r#"
public class Test {
public void setName(String name, int age) {
this.name = name;
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let func = java.extract_function(child, code.as_bytes()).unwrap();
assert_eq!(func.name, "setName");
assert_eq!(func.return_type, Some("void".to_string()));
assert_eq!(func.params.len(), 2);
assert_eq!(func.params[0], "String name");
assert_eq!(func.params[1], "int age");
break;
}
}
}
#[test]
fn test_extract_generic_method() {
let code = r#"
public class Test {
public static <T> List<T> process(List<T> items) {
return items;
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let func = java.extract_function(child, code.as_bytes()).unwrap();
assert_eq!(func.name, "process");
assert!(func.decorators.contains(&"static".to_string()));
assert!(func.decorators.iter().any(|d| d.starts_with("generic:")));
break;
}
}
}
#[test]
fn test_extract_class() {
let code = r#"
/**
* User class.
*/
public class User extends BaseEntity implements Serializable {
public String getName() { return name; }
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "class_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "User");
assert!(class.bases.iter().any(|b| b.contains("BaseEntity")));
assert!(class.bases.iter().any(|b| b.contains("Serializable")));
assert!(class.docstring.is_some());
assert_eq!(class.methods.len(), 1);
break;
}
}
}
#[test]
fn test_extract_interface() {
let code = r#"
public interface Processor<T> extends Runnable {
void process(T item);
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "interface_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "Processor");
assert!(class.decorators.contains(&"interface".to_string()));
assert!(class.bases.iter().any(|b| b.contains("Runnable")));
assert_eq!(class.methods.len(), 1);
break;
}
}
}
#[test]
fn test_extract_enum() {
let code = r#"
public enum Status {
ACTIVE, INACTIVE;
public String getCode() { return ""; }
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "enum_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "Status");
assert!(class.decorators.contains(&"enum".to_string()));
assert_eq!(class.methods.len(), 1);
break;
}
}
}
#[test]
fn test_extract_imports() {
let code = r#"
import java.util.List;
import java.util.*;
import static java.lang.Math.PI;
import static java.lang.Math.*;
public class Test {}
"#;
let tree = parse_java(code);
let java = Java;
let imports = java.extract_imports(&tree, code.as_bytes());
assert_eq!(imports.len(), 4);
assert_eq!(imports[0].module, "java.util.List");
assert!(imports[0].names.is_empty());
assert_eq!(imports[1].module, "java.util");
assert_eq!(imports[1].names, vec!["*"]);
assert_eq!(imports[2].module, "java.lang.Math");
assert_eq!(imports[2].names, vec!["PI"]);
assert!(imports[2].is_from);
assert_eq!(imports[3].module, "java.lang.Math");
assert_eq!(imports[3].names, vec!["*"]);
}
#[test]
fn test_extract_constructor() {
let code = r#"
public class User {
/**
* Creates a new user.
*/
public User(String name, int age) {
this.name = name;
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "constructor_declaration" {
let func = java.extract_function(child, code.as_bytes()).unwrap();
assert_eq!(func.name, "User");
assert!(func.return_type.is_none());
assert!(func.decorators.contains(&"constructor".to_string()));
assert_eq!(func.params.len(), 2);
assert!(func.docstring.is_some());
break;
}
}
}
#[test]
fn test_extract_annotations() {
let code = r#"
public class Test {
@Override
@Deprecated
public String toString() {
return "";
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let func = java.extract_function(child, code.as_bytes()).unwrap();
assert!(func.decorators.iter().any(|d| d.contains("Override")));
assert!(func.decorators.iter().any(|d| d.contains("Deprecated")));
break;
}
}
}
#[test]
fn test_build_simple_cfg() {
let code = r#"
public class Test {
public int add(int a, int b) {
int sum = a + b;
return sum;
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let cfg = java.build_cfg(child, code.as_bytes()).unwrap();
assert_eq!(cfg.function_name, "add");
assert!(!cfg.blocks.is_empty());
assert!(!cfg.exits.is_empty());
break;
}
}
}
#[test]
fn test_build_dfg() {
let code = r#"
public class Test {
public int compute(int x) {
int y = x + 1;
int z = y * 2;
return z;
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let dfg = java.build_dfg(child, code.as_bytes()).unwrap();
assert_eq!(dfg.function_name, "compute");
assert!(dfg.definitions.contains_key("x"));
assert!(dfg.definitions.contains_key("y"));
assert!(dfg.definitions.contains_key("z"));
assert!(!dfg.edges.is_empty());
break;
}
}
}
#[test]
fn test_extract_record() {
let code = r#"
/**
* Represents a point in 2D space.
*/
public record Point(int x, int y) implements Serializable {
public double distance() {
return Math.sqrt(x * x + y * y);
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "record_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "Point");
assert!(class.decorators.contains(&"record".to_string()));
assert!(class.bases.iter().any(|b| b.contains("Serializable")));
assert!(class.docstring.is_some());
assert_eq!(class.methods.len(), 1);
assert_eq!(class.methods[0].name, "distance");
break;
}
}
}
#[test]
fn test_extract_sealed_class() {
let code = r#"
public sealed class Shape permits Circle, Rectangle, Triangle {
public abstract double area();
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "class_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "Shape");
break;
}
}
}
#[test]
fn test_extract_var_type_inference() {
let code = r#"
public class Test {
public void process() {
var items = new ArrayList<String>();
var count = 0;
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let func = java.extract_function(child, code.as_bytes()).unwrap();
assert_eq!(func.name, "process");
break;
}
}
}
#[test]
fn test_extract_annotation_type() {
let code = r#"
/**
* Marks a test as integration test.
*/
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.METHOD)
public @interface IntegrationTest {
String value() default "";
int timeout() default 60;
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "annotation_type_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "IntegrationTest");
assert!(class.decorators.contains(&"annotation".to_string()));
assert!(class.docstring.is_some());
break;
}
}
}
#[test]
fn test_extract_static_initializer() {
let code = r#"
public class Config {
private static final Map<String, String> DEFAULTS;
static {
DEFAULTS = new HashMap<>();
DEFAULTS.put("key", "value");
}
public String get(String key) {
return DEFAULTS.get(key);
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "class_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "Config");
assert!(class.methods.iter().any(|m| m.name.contains("static_init")));
break;
}
}
}
#[test]
fn test_extract_instance_initializer() {
let code = r#"
public class Counter {
private int count;
{
count = 0;
}
public void increment() {
count++;
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "class_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "Counter");
assert!(class.methods.iter().any(|m| m.name.contains("instance_init")));
break;
}
}
}
#[test]
fn test_extract_inner_class() {
let code = r#"
public class Outer {
private int x;
public class Inner {
public void access() {
System.out.println(x);
}
}
public static class StaticNested {
public void process() {}
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "class_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "Outer");
assert_eq!(class.inner_classes.len(), 2);
assert!(class.inner_classes.iter().any(|c| c.name == "Inner"));
assert!(class.inner_classes.iter().any(|c| c.name == "StaticNested"));
break;
}
}
}
#[test]
fn test_extract_fields() {
let code = r#"
public class Entity {
public static final int MAX_SIZE = 100;
private String name;
protected int count = 0;
@Deprecated
volatile boolean active;
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "class_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "Entity");
assert_eq!(class.fields.len(), 4);
let max_size = class.fields.iter().find(|f| f.name == "MAX_SIZE").unwrap();
assert!(max_size.is_static);
assert!(max_size.is_final);
assert_eq!(max_size.visibility, Some("public".to_string()));
let name = class.fields.iter().find(|f| f.name == "name").unwrap();
assert_eq!(name.visibility, Some("private".to_string()));
assert_eq!(name.field_type, Some("String".to_string()));
let count = class.fields.iter().find(|f| f.name == "count").unwrap();
assert_eq!(count.visibility, Some("protected".to_string()));
assert!(count.default_value.is_some());
break;
}
}
}
#[test]
fn test_extract_enum_constants() {
let code = r#"
public enum Priority {
LOW(1),
MEDIUM(5),
HIGH(10);
private final int value;
Priority(int value) {
this.value = value;
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "enum_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "Priority");
assert!(class.fields.iter().any(|f| f.name == "LOW"));
assert!(class.fields.iter().any(|f| f.name == "MEDIUM"));
assert!(class.fields.iter().any(|f| f.name == "HIGH"));
let low_field = class.fields.iter().find(|f| f.name == "LOW").unwrap();
assert_eq!(low_field.field_type, Some("enum_constant".to_string()));
assert!(low_field.is_static);
assert!(low_field.is_final);
break;
}
}
}
#[test]
fn test_extract_package_declaration() {
let code = r#"
package com.example.myapp;
public class MyClass {}
"#;
let tree = parse_java(code);
let java = Java;
let imports = java.extract_imports(&tree, code.as_bytes());
assert!(!imports.is_empty());
let pkg = &imports[0];
assert_eq!(pkg.module, "com.example.myapp");
assert!(pkg.aliases.contains_key("package"));
}
#[test]
fn test_extract_switch_expression() {
let code = r#"
public class Test {
public String getDay(int day) {
return switch (day) {
case 1, 2, 3, 4, 5 -> "weekday";
case 6, 7 -> "weekend";
default -> "invalid";
};
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let cfg = java.build_cfg(child, code.as_bytes()).unwrap();
assert_eq!(cfg.function_name, "getDay");
assert!(cfg.blocks.len() >= 1);
break;
}
}
}
#[test]
fn test_extract_pattern_matching_instanceof() {
let code = r#"
public class Test {
public void process(Object obj) {
if (obj instanceof String s) {
System.out.println(s.length());
}
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let dfg = java.build_dfg(child, code.as_bytes()).unwrap();
assert_eq!(dfg.function_name, "process");
assert!(dfg.definitions.contains_key("s"));
break;
}
}
}
#[test]
fn test_extract_text_block() {
let code = r#"
public class Test {
public String getJson() {
return """
{
"name": "test",
"value": 42
}
""";
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let func = java.extract_function(child, code.as_bytes()).unwrap();
assert_eq!(func.name, "getJson");
assert_eq!(func.return_type, Some("String".to_string()));
break;
}
}
}
#[test]
fn test_extract_module_declaration() {
let code = r#"
module com.example.mymodule {
requires java.base;
requires transitive java.sql;
exports com.example.api;
opens com.example.internal to com.example.test;
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "module_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "com.example.mymodule");
assert!(class.decorators.contains(&"module".to_string()));
assert!(class.bases.iter().any(|b| b.contains("requires")));
assert!(class.bases.iter().any(|b| b.contains("exports")));
break;
}
}
}
#[test]
fn test_extract_lambda_calls() {
let code = r#"
public class Test {
public void process(List<String> items) {
items.forEach(s -> System.out.println(s));
items.stream().map(String::toUpperCase).collect(Collectors.toList());
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let func = java.extract_function(child, code.as_bytes()).unwrap();
assert_eq!(func.name, "process");
break;
}
}
}
#[test]
fn test_extract_throws_clause() {
let code = r#"
public class Test {
public void riskyOperation() throws IOException, SQLException {
// implementation
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let func = java.extract_function(child, code.as_bytes()).unwrap();
assert_eq!(func.name, "riskyOperation");
assert!(func.decorators.iter().any(|d| d.contains("throws:")));
break;
}
}
}
#[test]
fn test_extract_generics_with_bounds() {
let code = r#"
public class Repository<T extends Entity & Serializable> {
public <U extends Comparable<U>> List<U> sort(List<U> items) {
return items;
}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
for child in root.children(&mut root.walk()) {
if child.kind() == "class_declaration" {
let class = java.extract_class(child, code.as_bytes()).unwrap();
assert_eq!(class.name, "Repository");
assert!(class.decorators.iter().any(|d| d.contains("generic:")));
break;
}
}
}
#[test]
fn test_extract_wildcard_types() {
let code = r#"
public class Test {
public void processUpper(List<? extends Number> items) {}
public void processLower(List<? super Integer> items) {}
public void processUnbounded(List<?> items) {}
}
"#;
let tree = parse_java(code);
let java = Java;
let root = tree.root_node();
let class_body = root.child(0).unwrap().child_by_field_name("body").unwrap();
let mut method_count = 0;
for child in class_body.children(&mut class_body.walk()) {
if child.kind() == "method_declaration" {
let func = java.extract_function(child, code.as_bytes()).unwrap();
assert!(func.params.len() == 1);
method_count += 1;
}
}
assert_eq!(method_count, 3);
}
}