#![allow(clippy::collapsible_if, clippy::redundant_closure)]
use std::collections::{HashMap, HashSet};
use super::const_prop::ConstLattice;
use super::ir::*;
use crate::cfg::Cfg;
use crate::symbol::Lang;
#[derive(Clone, Debug, Default)]
pub struct StaticMapResult {
pub finite_string_values: HashMap<SsaValue, Vec<String>>,
}
impl StaticMapResult {
pub fn empty() -> Self {
Self::default()
}
pub fn is_empty(&self) -> bool {
self.finite_string_values.is_empty()
}
}
fn is_rust_map_constructor(callee: &str) -> bool {
let leaf_after_colon = callee.rsplit("::").next().unwrap_or(callee);
if leaf_after_colon != "new" {
return false;
}
let type_part = callee.rsplit("::").nth(1).unwrap_or("");
matches!(type_part, "HashMap" | "BTreeMap")
}
#[derive(Clone, Debug, PartialEq, Eq)]
enum MapUse {
Insert,
StaticLookup,
ReadOnly,
Escape,
}
fn classify_map_use(callee: &str, map_var: &str) -> MapUse {
let method = callee
.strip_prefix(map_var)
.and_then(|rest| rest.strip_prefix('.'));
if let Some(method) = method {
match method {
"insert" => return MapUse::Insert,
"contains_key" | "len" | "is_empty" | "clear" => return MapUse::ReadOnly,
_ => {}
}
if let Some(rest) = method.strip_prefix("get(") {
if let Some(after_args) = scan_past_balanced_parens(rest) {
if is_identity_chain_ending_in_unwrap_or(after_args) {
return MapUse::StaticLookup;
}
}
}
}
MapUse::Escape
}
fn scan_past_balanced_parens(s: &str) -> Option<&str> {
let bytes = s.as_bytes();
let mut depth: i32 = 1;
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b'(' => depth += 1,
b')' => {
depth -= 1;
if depth == 0 {
return Some(&s[i + 1..]);
}
}
_ => {}
}
i += 1;
}
None
}
fn is_identity_chain_ending_in_unwrap_or(mut s: &str) -> bool {
const IDENTS: &[&str] = &[".copied()", ".cloned()", ".as_deref()", ".as_ref()"];
loop {
if s == ".unwrap_or" {
return true;
}
let mut advanced = false;
for id in IDENTS {
if let Some(rest) = s.strip_prefix(id) {
s = rest;
advanced = true;
break;
}
}
if !advanced {
return false;
}
}
}
fn resolve_alias(v: SsaValue, aliases: &HashMap<SsaValue, SsaValue>) -> SsaValue {
let mut cur = v;
for _ in 0..64 {
match aliases.get(&cur) {
Some(&next) if next != cur => cur = next,
_ => break,
}
}
cur
}
pub fn analyze(
body: &SsaBody,
cfg: &Cfg,
lang: Option<Lang>,
_const_values: &HashMap<SsaValue, ConstLattice>,
) -> StaticMapResult {
if lang != Some(Lang::Rust) {
return StaticMapResult::empty();
}
let mut candidates: HashMap<SsaValue, String> = HashMap::new();
for block in &body.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
if let SsaOp::Call { callee, .. } = &inst.op {
if is_rust_map_constructor(callee) {
if let Some(name) = inst.var_name.as_deref() {
if !name.is_empty() {
candidates.insert(inst.value, name.to_string());
}
}
}
}
}
}
if candidates.is_empty() {
return StaticMapResult::empty();
}
let mut aliases: HashMap<SsaValue, SsaValue> = HashMap::new();
for block in &body.blocks {
for inst in &block.body {
if let SsaOp::Assign(uses) = &inst.op {
if uses.len() == 1 {
let src = resolve_alias(uses[0], &aliases);
if candidates.contains_key(&src) {
aliases.insert(inst.value, src);
}
}
}
}
}
let canonicalise = |v: SsaValue| -> Option<SsaValue> {
let c = resolve_alias(v, &aliases);
if candidates.contains_key(&c) {
Some(c)
} else {
None
}
};
let mut inserted: HashMap<SsaValue, HashSet<String>> = HashMap::new();
let mut invalid: HashSet<SsaValue> = HashSet::new();
let mut lookups: Vec<(SsaValue, SsaValue, String)> = Vec::new();
for c in candidates.keys() {
inserted.insert(*c, HashSet::new());
}
for block in &body.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
match &inst.op {
SsaOp::Phi(operands) => {
for (_, v) in operands {
if let Some(canon) = canonicalise(*v) {
invalid.insert(canon);
}
}
}
SsaOp::Call {
callee,
args,
receiver,
..
} => {
if candidates.contains_key(&inst.value) && is_rust_map_constructor(callee) {
continue;
}
if let Some(map) = receiver.and_then(|r| canonicalise(r)) {
let map_var = candidates.get(&map).cloned().unwrap_or_default();
match classify_map_use(callee, &map_var) {
MapUse::Insert => {
let node_info = &cfg[inst.cfg_node];
let value_lit =
node_info.call.arg_string_literals.get(1).cloned().flatten();
match value_lit {
Some(lit) => {
inserted.entry(map).or_default().insert(lit);
}
None => {
invalid.insert(map);
}
}
}
MapUse::StaticLookup => {
let node_info = &cfg[inst.cfg_node];
if let Some(Some(fallback)) =
node_info.call.arg_string_literals.first().cloned()
{
lookups.push((map, inst.value, fallback));
}
}
MapUse::ReadOnly => {}
MapUse::Escape => {
invalid.insert(map);
}
}
}
for group in args {
for &v in group {
if let Some(canon) = canonicalise(v) {
invalid.insert(canon);
}
}
}
}
SsaOp::Assign(uses) if uses.len() != 1 => {
for &u in uses {
if let Some(canon) = canonicalise(u) {
invalid.insert(canon);
}
}
}
_ => {}
}
}
if let Terminator::Return(Some(v)) = &block.terminator {
if let Some(canon) = canonicalise(*v) {
invalid.insert(canon);
}
}
}
let mut result = StaticMapResult::default();
for (map, lookup_val, fallback) in lookups {
if invalid.contains(&map) {
continue;
}
let lits = match inserted.get(&map) {
Some(s) if !s.is_empty() => s,
_ => continue,
};
let mut domain: Vec<String> = lits.iter().cloned().collect();
domain.push(fallback);
domain.sort();
domain.dedup();
result.finite_string_values.insert(lookup_val, domain);
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rust_map_constructor_matches() {
assert!(is_rust_map_constructor("HashMap::new"));
assert!(is_rust_map_constructor("std::collections::HashMap::new"));
assert!(is_rust_map_constructor("BTreeMap::new"));
assert!(!is_rust_map_constructor("HashMap::from"));
assert!(!is_rust_map_constructor("HashMap::with_capacity"));
assert!(!is_rust_map_constructor("Vec::new"));
}
#[test]
fn classify_insert_call() {
assert_eq!(classify_map_use("table.insert", "table"), MapUse::Insert);
}
#[test]
fn classify_read_only_call() {
assert_eq!(
classify_map_use("table.contains_key", "table"),
MapUse::ReadOnly
);
assert_eq!(classify_map_use("table.len", "table"), MapUse::ReadOnly);
assert_eq!(classify_map_use("table.values", "table"), MapUse::Escape);
assert_eq!(classify_map_use("table.iter", "table"), MapUse::Escape);
}
#[test]
fn classify_static_lookup_with_copied() {
assert_eq!(
classify_map_use("table.get(key.as_str()).copied().unwrap_or", "table"),
MapUse::StaticLookup
);
}
#[test]
fn classify_static_lookup_without_identity_chain() {
assert_eq!(
classify_map_use("table.get(k).unwrap_or", "table"),
MapUse::StaticLookup
);
}
#[test]
fn classify_static_lookup_mixed_identity_chain() {
assert_eq!(
classify_map_use("t.get(k).as_deref().cloned().unwrap_or", "t"),
MapUse::StaticLookup
);
}
#[test]
fn classify_rejects_unknown_terminator() {
assert_eq!(
classify_map_use("t.get(k).copied().unwrap_or_else", "t"),
MapUse::Escape
);
assert_eq!(classify_map_use("t.get(k).unwrap", "t"), MapUse::Escape);
}
#[test]
fn classify_rejects_other_receiver() {
assert_eq!(classify_map_use("other.insert", "table"), MapUse::Escape);
}
#[test]
fn scan_past_balanced_parens_basic() {
assert_eq!(scan_past_balanced_parens("foo)").unwrap_or(""), "");
assert_eq!(scan_past_balanced_parens("foo).bar").unwrap_or(""), ".bar");
assert_eq!(
scan_past_balanced_parens("foo(bar)baz).x").unwrap_or(""),
".x"
);
assert!(scan_past_balanced_parens("no-close").is_none());
}
#[test]
fn non_rust_lang_returns_empty() {
use petgraph::Graph;
let body = SsaBody {
blocks: vec![],
entry: BlockId(0),
value_defs: vec![],
cfg_node_map: std::collections::HashMap::new(),
exception_edges: vec![],
field_interner: crate::ssa::ir::FieldInterner::default(),
field_writes: std::collections::HashMap::new(),
synthetic_externals: std::collections::HashSet::new(),
};
let cfg: Cfg = Graph::new();
let const_values = HashMap::new();
let result = analyze(&body, &cfg, Some(Lang::Java), &const_values);
assert!(result.is_empty());
}
}