use crate::flow::cfg::{BasicBlock, CFG};
use crate::flow::dataflow::{DataflowResult, Direction, TransferFunction, find_node_by_id};
use crate::semantics::LanguageSemantics;
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Definition {
pub var_name: String,
pub node_id: usize,
pub line: usize,
pub origin: DefOrigin,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum DefOrigin {
Literal,
Parameter(usize),
FunctionCall(String),
MemberAccess(String),
Variable(String),
Expression,
Unknown,
}
pub struct ReachingDefsTransfer {
pub semantics: &'static LanguageSemantics,
}
impl TransferFunction<Definition> for ReachingDefsTransfer {
fn transfer(
&self,
block: &BasicBlock,
input: &HashSet<Definition>,
_cfg: &CFG,
source: &[u8],
tree: &tree_sitter::Tree,
) -> HashSet<Definition> {
let mut state = input.clone();
for &stmt_node_id in &block.statements {
if let Some(node) = find_node_by_id(tree, stmt_node_id) {
self.process_statement(node, source, &mut state);
}
}
state
}
}
impl ReachingDefsTransfer {
pub fn new(semantics: &'static LanguageSemantics) -> Self {
Self { semantics }
}
fn process_statement(
&self,
node: tree_sitter::Node,
source: &[u8],
state: &mut HashSet<Definition>,
) {
let kind = node.kind();
let sem = self.semantics;
if sem.is_variable_declaration(kind) {
if let Some((var_name, origin, line)) = self.extract_definition(node, source) {
state.retain(|d| d.var_name != var_name);
state.insert(Definition {
var_name,
node_id: node.id(),
line,
origin,
});
}
}
if sem.is_assignment(kind) || sem.is_augmented_assignment(kind) {
if let Some((var_name, origin, line)) = self.extract_assignment(node, source) {
state.retain(|d| d.var_name != var_name);
state.insert(Definition {
var_name,
node_id: node.id(),
line,
origin,
});
}
}
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
if !sem.is_function_def(child.kind()) {
self.process_statement(child, source, state);
}
}
}
fn extract_definition(
&self,
node: tree_sitter::Node,
source: &[u8],
) -> Option<(String, DefOrigin, usize)> {
let sem = self.semantics;
let (name_node, value_node) = match node.kind() {
"variable_declarator" => (
node.child_by_field_name("name"),
node.child_by_field_name("value"),
),
"let_declaration" => (
node.child_by_field_name("pattern"),
node.child_by_field_name("value"),
),
"short_var_declaration" => {
let left = node.child_by_field_name("left");
let right = node.child_by_field_name("right");
if let (Some(l), Some(r)) = (left, right) {
let name = if l.kind() == "expression_list" {
l.named_child(0)
} else {
Some(l)
};
let value = if r.kind() == "expression_list" {
r.named_child(0)
} else {
Some(r)
};
(name, value)
} else {
(None, None)
}
}
"assignment" => (
node.child_by_field_name("left"),
node.child_by_field_name("right"),
),
"local_variable_declaration" => {
let mut cursor = node.walk();
let declarator = node
.named_children(&mut cursor)
.find(|c| c.kind() == "variable_declarator");
if let Some(d) = declarator {
(
d.child_by_field_name("name"),
d.child_by_field_name("value"),
)
} else {
(None, None)
}
}
_ => {
(
node.child_by_field_name(sem.name_field)
.or_else(|| node.child_by_field_name(sem.left_field)),
node.child_by_field_name(sem.value_field)
.or_else(|| node.child_by_field_name(sem.right_field)),
)
}
};
let name = name_node?;
if !sem.is_identifier(name.kind()) && name.kind() != "identifier" {
return None;
}
let name_str = name.utf8_text(source).ok()?.to_string();
let name_str = name_str.trim_start_matches("mut ").trim().to_string();
let origin = if let Some(val) = value_node {
self.classify_origin(val, source)
} else {
DefOrigin::Unknown
};
Some((name_str, origin, node.start_position().row + 1))
}
fn extract_assignment(
&self,
node: tree_sitter::Node,
source: &[u8],
) -> Option<(String, DefOrigin, usize)> {
let sem = self.semantics;
let left = node.child_by_field_name(sem.left_field)?;
let right = node.child_by_field_name(sem.right_field)?;
if !sem.is_identifier(left.kind()) && left.kind() != "identifier" {
return None;
}
let name = left.utf8_text(source).ok()?.to_string();
let origin = self.classify_origin(right, source);
Some((name, origin, node.start_position().row + 1))
}
fn classify_origin(&self, node: tree_sitter::Node, source: &[u8]) -> DefOrigin {
let sem = self.semantics;
let kind = node.kind();
if sem.is_literal(kind) {
DefOrigin::Literal
} else if sem.is_call(kind) {
let func_name = node
.child_by_field_name(sem.function_field)
.and_then(|f| f.utf8_text(source).ok())
.unwrap_or("")
.to_string();
DefOrigin::FunctionCall(func_name)
} else if sem.is_member_access(kind) {
let full_path = node.utf8_text(source).ok().unwrap_or("").to_string();
DefOrigin::MemberAccess(full_path)
} else if sem.is_identifier(kind) || kind == "identifier" {
let name = node.utf8_text(source).ok().unwrap_or("").to_string();
DefOrigin::Variable(name)
} else if sem.is_binary_expression(kind) || kind.contains("concatenat") {
DefOrigin::Expression
} else {
DefOrigin::Unknown
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Use {
pub var_name: String,
pub node_id: usize,
pub line: usize,
}
#[derive(Debug, Default)]
pub struct DefUseChains {
pub def_to_uses: HashMap<Definition, Vec<Use>>,
pub use_to_defs: HashMap<Use, Vec<Definition>>,
}
impl DefUseChains {
pub fn build(
reaching_defs: &DataflowResult<Definition>,
cfg: &CFG,
tree: &tree_sitter::Tree,
source: &[u8],
semantics: &LanguageSemantics,
) -> Self {
let mut def_to_uses: HashMap<Definition, Vec<Use>> = HashMap::new();
let mut use_to_defs: HashMap<Use, Vec<Definition>> = HashMap::new();
for defs in reaching_defs.block_entry.values() {
for def in defs {
def_to_uses.entry(def.clone()).or_default();
}
}
for defs in reaching_defs.block_exit.values() {
for def in defs {
def_to_uses.entry(def.clone()).or_default();
}
}
for block in &cfg.blocks {
if !block.reachable {
continue;
}
let reaching = reaching_defs
.block_entry
.get(&block.id)
.cloned()
.unwrap_or_default();
for &stmt_node_id in &block.statements {
if let Some(node) = find_node_by_id(tree, stmt_node_id) {
let uses = Self::extract_uses(node, source, semantics);
for use_info in uses {
let matching_defs: Vec<Definition> = reaching
.iter()
.filter(|d| d.var_name == use_info.var_name)
.cloned()
.collect();
for def in &matching_defs {
def_to_uses
.entry(def.clone())
.or_default()
.push(use_info.clone());
}
if !matching_defs.is_empty() {
use_to_defs.insert(use_info, matching_defs);
}
}
}
}
}
Self {
def_to_uses,
use_to_defs,
}
}
fn extract_uses(
node: tree_sitter::Node,
source: &[u8],
semantics: &LanguageSemantics,
) -> Vec<Use> {
let mut uses = Vec::new();
Self::collect_uses(node, source, semantics, &mut uses, false);
uses
}
fn collect_uses(
node: tree_sitter::Node,
source: &[u8],
semantics: &LanguageSemantics,
uses: &mut Vec<Use>,
is_lhs: bool,
) {
let kind = node.kind();
if (semantics.is_identifier(kind) || kind == "identifier") && !is_lhs {
if let Ok(name) = node.utf8_text(source) {
uses.push(Use {
var_name: name.to_string(),
node_id: node.id(),
line: node.start_position().row + 1,
});
}
return;
}
if semantics.is_assignment(kind)
|| semantics.is_variable_declaration(kind)
|| semantics.is_augmented_assignment(kind)
{
if let Some(left) = node.child_by_field_name(semantics.left_field) {
Self::collect_uses(left, source, semantics, uses, true);
}
if let Some(right) = node.child_by_field_name(semantics.right_field) {
Self::collect_uses(right, source, semantics, uses, false);
}
if let Some(value) = node.child_by_field_name(semantics.value_field) {
Self::collect_uses(value, source, semantics, uses, false);
}
if semantics.is_augmented_assignment(kind) {
if let Some(left) = node.child_by_field_name(semantics.left_field) {
Self::collect_uses(left, source, semantics, uses, false);
}
}
return;
}
if semantics.is_function_def(kind) {
return;
}
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
Self::collect_uses(child, source, semantics, uses, is_lhs);
}
}
pub fn is_dead_store(&self, def: &Definition) -> bool {
self.def_to_uses
.get(def)
.map_or(true, |uses| uses.is_empty())
}
pub fn dead_stores(&self) -> Vec<&Definition> {
self.def_to_uses
.iter()
.filter(|(_, uses)| uses.is_empty())
.map(|(def, _)| def)
.collect()
}
pub fn definitions_count(&self, use_info: &Use) -> usize {
self.use_to_defs.get(use_info).map_or(0, |defs| defs.len())
}
pub fn uses_of(&self, def: &Definition) -> Vec<&Use> {
self.def_to_uses
.get(def)
.map(|uses| uses.iter().collect())
.unwrap_or_default()
}
pub fn defs_reaching(&self, use_info: &Use) -> Vec<&Definition> {
self.use_to_defs
.get(use_info)
.map(|defs| defs.iter().collect())
.unwrap_or_default()
}
}
pub fn initial_definitions(
function_node: tree_sitter::Node,
source: &[u8],
semantics: &LanguageSemantics,
) -> HashSet<Definition> {
let mut defs = HashSet::new();
if let Some(params) = function_node.child_by_field_name(semantics.parameters_field) {
let mut cursor = params.walk();
let mut index = 0;
for child in params.named_children(&mut cursor) {
let name = child
.child_by_field_name(semantics.name_field)
.or(
if semantics.is_identifier(child.kind()) || child.kind() == "identifier" {
Some(child)
} else {
None
},
)
.and_then(|n| n.utf8_text(source).ok())
.map(|s| s.to_string());
if let Some(name) = name {
if name == "self" || name == "cls" {
continue;
}
defs.insert(Definition {
var_name: name,
node_id: child.id(),
line: child.start_position().row + 1,
origin: DefOrigin::Parameter(index),
});
index += 1;
}
}
}
defs
}
pub fn analyze_reaching_definitions(
cfg: &CFG,
tree: &tree_sitter::Tree,
source: &[u8],
semantics: &'static LanguageSemantics,
) -> (DataflowResult<Definition>, DefUseChains) {
let transfer = ReachingDefsTransfer::new(semantics);
let result = super::dataflow::solve(cfg, Direction::Forward, &transfer, source, tree);
let chains = DefUseChains::build(&result, cfg, tree, source, semantics);
(result, chains)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::flow::cfg::CFG;
use rma_common::Language;
use rma_parser::ParserEngine;
use std::path::Path;
fn parse_js(code: &str) -> rma_parser::ParsedFile {
let config = rma_common::RmaConfig::default();
let parser = ParserEngine::new(config);
parser
.parse_file(Path::new("test.js"), code)
.expect("parse failed")
}
#[test]
fn test_simple_definition() {
let code = "const x = 1;";
let parsed = parse_js(code);
let cfg = CFG::build(&parsed, Language::JavaScript);
let semantics = LanguageSemantics::for_language(Language::JavaScript);
let (result, _chains) =
analyze_reaching_definitions(&cfg, &parsed.tree, code.as_bytes(), semantics);
let all_defs: HashSet<_> = result.block_exit.values().flat_map(|s| s.iter()).collect();
assert!(!all_defs.is_empty());
}
#[test]
fn test_def_kill() {
let code = r#"
let x = 1;
x = 2;
"#;
let parsed = parse_js(code);
let cfg = CFG::build(&parsed, Language::JavaScript);
let semantics = LanguageSemantics::for_language(Language::JavaScript);
let (result, _chains) =
analyze_reaching_definitions(&cfg, &parsed.tree, code.as_bytes(), semantics);
let final_defs: HashSet<_> = result
.block_exit
.values()
.flat_map(|s| s.iter())
.filter(|d| d.var_name == "x")
.collect();
assert_eq!(final_defs.len(), 1);
}
#[test]
fn test_def_origin_literal() {
let code = "const x = 42;";
let parsed = parse_js(code);
let cfg = CFG::build(&parsed, Language::JavaScript);
let semantics = LanguageSemantics::for_language(Language::JavaScript);
let (result, _) =
analyze_reaching_definitions(&cfg, &parsed.tree, code.as_bytes(), semantics);
let defs: Vec<_> = result
.block_exit
.values()
.flat_map(|s| s.iter())
.filter(|d| d.var_name == "x")
.collect();
assert!(!defs.is_empty());
assert!(matches!(defs[0].origin, DefOrigin::Literal));
}
#[test]
fn test_def_origin_function_call() {
let code = "const x = getData();";
let parsed = parse_js(code);
let cfg = CFG::build(&parsed, Language::JavaScript);
let semantics = LanguageSemantics::for_language(Language::JavaScript);
let (result, _) =
analyze_reaching_definitions(&cfg, &parsed.tree, code.as_bytes(), semantics);
let defs: Vec<_> = result
.block_exit
.values()
.flat_map(|s| s.iter())
.filter(|d| d.var_name == "x")
.collect();
assert!(!defs.is_empty());
assert!(matches!(defs[0].origin, DefOrigin::FunctionCall(_)));
}
#[test]
fn test_dead_store_detection() {
let code = r#"
let x = 1;
x = 2;
console.log(x);
"#;
let parsed = parse_js(code);
let cfg = CFG::build(&parsed, Language::JavaScript);
let semantics = LanguageSemantics::for_language(Language::JavaScript);
let (_, chains) =
analyze_reaching_definitions(&cfg, &parsed.tree, code.as_bytes(), semantics);
let dead = chains.dead_stores();
let dead_x: Vec<_> = dead.iter().filter(|d| d.var_name == "x").collect();
assert!(
!dead_x.is_empty(),
"Should detect dead store for first x assignment"
);
}
#[test]
fn test_def_use_chain() {
let code = r#"
let x = 1;
console.log(x);
"#;
let parsed = parse_js(code);
let cfg = CFG::build(&parsed, Language::JavaScript);
let semantics = LanguageSemantics::for_language(Language::JavaScript);
let (_, chains) =
analyze_reaching_definitions(&cfg, &parsed.tree, code.as_bytes(), semantics);
let x_defs: Vec<_> = chains
.def_to_uses
.keys()
.filter(|d| d.var_name == "x")
.collect();
assert!(!x_defs.is_empty());
let uses = chains.uses_of(x_defs[0]);
let _ = uses; }
}