use super::cfg::{BasicBlock, BlockId, CfgEdge, FunctionCfg};
use super::const_eval;
use super::dataflow::find_node_at_range;
use std::collections::{HashMap, HashSet, VecDeque};
use tree_sitter::Node;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InitState {
Uninitialized,
MaybeUninitialized,
Initialized,
MallocUninitialized,
MallocInitialized,
}
impl InitState {
pub fn join(self, other: InitState) -> InitState {
use InitState::*;
if self == other {
return self;
}
match (self, other) {
(MallocUninitialized, MallocInitialized) | (MallocInitialized, MallocUninitialized) => {
MallocInitialized
}
(Initialized, Uninitialized) | (Uninitialized, Initialized) => MaybeUninitialized,
(Initialized, MallocUninitialized)
| (MallocUninitialized, Initialized)
| (Initialized, MallocInitialized)
| (MallocInitialized, Initialized) => Initialized,
(MaybeUninitialized, _) | (_, MaybeUninitialized) => MaybeUninitialized,
_ => MaybeUninitialized,
}
}
pub fn is_unsafe(self) -> bool {
matches!(
self,
InitState::Uninitialized | InitState::MaybeUninitialized
)
}
pub fn is_content_unsafe(self) -> bool {
matches!(
self,
InitState::Uninitialized
| InitState::MaybeUninitialized
| InitState::MallocUninitialized
)
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct VarInfo {
pub state: InitState,
pub is_unsigned_char: bool,
pub is_array: bool,
pub is_static: bool,
pub is_char_type: bool,
pub allocation_count: Option<usize>,
}
impl VarInfo {
pub fn new(state: InitState) -> Self {
Self {
state,
is_unsigned_char: false,
is_array: false,
is_static: false,
is_char_type: false,
allocation_count: None,
}
}
}
pub type InitStateMap = HashMap<String, VarInfo>;
fn join_states(a: &InitStateMap, b: &InitStateMap) -> InitStateMap {
let mut result = a.clone();
for (var, info_b) in b {
let entry = result.entry(var.clone()).or_insert_with(|| info_b.clone());
if entry.state != info_b.state {
entry.state = entry.state.join(info_b.state);
}
}
result
}
fn join_with_goto(normal_path: &InitStateMap, goto_path: &InitStateMap) -> InitStateMap {
let mut result = HashMap::new();
let all_keys: HashSet<&String> = normal_path.keys().chain(goto_path.keys()).collect();
for var in all_keys {
match (normal_path.get(var), goto_path.get(var)) {
(Some(ia), Some(ib)) => {
let mut merged = ia.clone();
merged.state = ia.state.join(ib.state);
result.insert(var.clone(), merged);
}
(Some(ia), None) => {
let mut merged = ia.clone();
merged.state = ia.state.join(InitState::Uninitialized);
result.insert(var.clone(), merged);
}
(None, Some(ib)) => {
result.insert(var.clone(), ib.clone());
}
(None, None) => unreachable!(),
}
}
result
}
#[allow(dead_code)]
pub struct InitAnalysisResult {
pub block_entry_states: HashMap<BlockId, InitStateMap>,
pub block_exit_states: HashMap<BlockId, InitStateMap>,
pub tracked_vars: HashSet<String>,
}
const INITIALIZER_SUFFIXES: &[&str] = &[
"memset", "memcpy", "memmove", "strcpy", "strncpy", "sprintf", "snprintf", "bzero", "strcat",
"strncat",
];
pub fn match_initializing_function(func_name: &str) -> Option<&'static str> {
if let Some(&entry) = INITIALIZING_FUNCTIONS.iter().find(|&&e| e == func_name) {
return Some(entry);
}
INITIALIZER_SUFFIXES
.iter()
.find(|&&suffix| func_name.len() > suffix.len() && func_name.ends_with(suffix))
.copied()
}
pub const INITIALIZING_FUNCTIONS: &[&str] = &[
"memset",
"memcpy",
"memmove",
"strcpy",
"strncpy",
"sprintf",
"snprintf",
"fgets",
"fread",
"read",
"recv",
"scanf",
"fscanf",
"sscanf",
"gets",
"bzero",
"strcat",
"strncat",
"gettimeofday",
"getaddrinfo",
"stat",
"fstat",
"lstat",
"getrusage",
"getsockname",
"getpeername",
"clock_gettime",
"pthread_attr_init",
"pthread_mutex_init",
"pthread_cond_init",
"regcomp",
"regexec",
"sigaction",
"sigemptyset",
"sigfillset",
"mbrlen",
"mbrtowc",
"mbsrtowcs",
"wcrtomb",
"wcsrtombs",
];
pub fn get_output_arg_indices(func_name: &str) -> Vec<usize> {
match func_name {
"memset" | "memcpy" | "memmove" | "strcpy" | "strncpy" | "sprintf" | "snprintf"
| "strcat" | "strncat" | "bzero" => vec![0],
"fgets" | "gets" => vec![0],
"fread" | "read" | "recv" => vec![0],
"scanf" | "fscanf" | "sscanf" => vec![],
"gettimeofday" => vec![0],
"getaddrinfo" => vec![3],
"stat" | "fstat" | "lstat" => vec![1],
"getrusage" => vec![1],
"getsockname" | "getpeername" => vec![1, 2],
"clock_gettime" => vec![1],
"pthread_attr_init" | "pthread_mutex_init" | "pthread_cond_init" => vec![0],
"sigaction" => vec![2],
"sigemptyset" | "sigfillset" => vec![0],
"regcomp" => vec![0],
"mbrlen" | "mbrtowc" | "mbsrtowcs" | "wcrtomb" | "wcsrtombs" => vec![],
"regexec" => vec![],
_ => vec![0], }
}
pub fn is_non_initializing_function(func_name: &str) -> bool {
matches!(
func_name,
"mbrlen"
| "mbrtowc"
| "mbsrtowcs"
| "wcrtomb"
| "wcsrtombs"
| "regexec"
| "memcmp"
| "strcmp"
| "strncmp"
| "wmemcmp"
| "wcscmp"
| "wcsncmp"
| "printf"
| "fprintf"
| "vprintf"
| "vfprintf"
| "puts"
| "fputs"
| "putchar"
| "fputc"
| "strlen"
| "wcslen"
| "strchr"
| "strrchr"
| "strstr"
| "strpbrk"
| "crc32"
| "md5"
| "sha1"
| "sha256"
| "fwrite"
| "write"
| "send"
| "sendto"
)
}
pub const FOR_EACH_MACROS: &[&str] = &[
"dl_list_for_each",
"dl_list_for_each_safe",
"dl_list_for_each_reverse",
"for_each_link",
"TAILQ_FOREACH",
"TAILQ_FOREACH_SAFE",
"LIST_FOREACH",
"LIST_FOREACH_SAFE",
"SLIST_FOREACH",
"SLIST_FOREACH_SAFE",
"STAILQ_FOREACH",
"STAILQ_FOREACH_SAFE",
"RB_FOREACH",
"SIMPLEQ_FOREACH",
"CIRCLEQ_FOREACH",
];
#[derive(Default)]
pub struct InitAnalysisConfig {
pub conditionally_init_fns: HashMap<String, HashSet<usize>>,
pub realloc_wrapper_fns: HashSet<String>,
pub read_only_deref_fns: HashMap<String, HashSet<usize>>,
pub file_scope_constants: HashMap<String, i64>,
}
fn apply_transfer(
block: &BasicBlock,
entry_state: &InitStateMap,
body: &Node,
source: &str,
tracked_vars: &mut HashSet<String>,
config: &InitAnalysisConfig,
) -> InitStateMap {
let mut state = entry_state.clone();
for &(start, end) in &block.statements {
if let Some(stmt_node) = find_node_at_range(body, start, end) {
process_statement(&stmt_node, source, &mut state, tracked_vars, config);
}
}
state
}
fn process_statement(
node: &Node,
source: &str,
state: &mut InitStateMap,
tracked_vars: &mut HashSet<String>,
config: &InitAnalysisConfig,
) {
match node.kind() {
"declaration" => process_declaration(node, source, state, tracked_vars, config),
"expression_statement" => {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.kind() != ";" {
process_expression(&child, source, state, tracked_vars, config);
}
}
}
}
"assignment_expression" | "update_expression" | "call_expression" | "comma_expression" => {
process_expression(node, source, state, tracked_vars, config);
}
"init_declarator" => {
}
"gnu_asm_expression" => {
process_gnu_asm(node, source, state);
}
_ => {
find_and_process_init_expressions(node, source, state, tracked_vars, config);
}
}
}
fn process_declaration(
node: &Node,
source: &str,
state: &mut InitStateMap,
tracked_vars: &mut HashSet<String>,
config: &InitAnalysisConfig,
) {
let type_text = extract_type_text(node, source);
let is_unsigned_char = type_text.contains("unsigned char")
|| type_text.contains("uint8_t")
|| type_text.contains("BYTE");
let is_char_type =
(type_text.contains("char") || type_text.contains("wchar_t")) && !is_unsigned_char;
let is_static = type_text.contains("static") || type_text.contains("_Thread_local");
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
match child.kind() {
"init_declarator" => {
let var_name = get_declarator_name(&child, source);
if var_name.is_empty() {
continue;
}
tracked_vars.insert(var_name.clone());
if let Some(value) = child.child_by_field_name("value") {
let init_state = classify_initializer(&value, source, config);
let is_array = is_array_declarator(&child);
let mut info = VarInfo::new(init_state);
info.is_unsigned_char = is_unsigned_char;
info.is_char_type = is_char_type;
info.is_array = is_array;
info.is_static = is_static;
if matches!(init_state, InitState::MallocUninitialized) {
info.allocation_count = extract_allocation_count(&value, source);
}
state.insert(var_name, info);
}
}
"identifier" => {
let var_name = get_text(node, &child, source);
if !var_name.is_empty() && !is_type_keyword(&var_name) {
tracked_vars.insert(var_name.clone());
let init_state = if is_static {
InitState::Uninitialized
} else {
InitState::Uninitialized
};
let is_array = false;
let mut info = VarInfo::new(init_state);
info.is_unsigned_char = is_unsigned_char;
info.is_char_type = is_char_type;
info.is_array = is_array;
info.is_static = is_static;
state.insert(var_name, info);
}
}
"pointer_declarator" | "array_declarator" => {
let var_name = get_declarator_name(&child, source);
if !var_name.is_empty() {
tracked_vars.insert(var_name.clone());
let init_state = InitState::Uninitialized;
let is_array = child.kind() == "array_declarator";
let mut info = VarInfo::new(init_state);
info.is_unsigned_char = is_unsigned_char;
info.is_char_type = is_char_type;
info.is_array = is_array;
info.is_static = is_static;
if is_array {
if let Some(size_node) = child.child_by_field_name("size") {
let empty: HashMap<String, i64> = HashMap::new();
if let Some(sz) =
const_eval::try_evaluate_expr(&size_node, source, &empty)
{
if sz > 0 {
info.allocation_count = Some(sz as usize);
}
}
}
}
state.insert(var_name, info);
}
}
_ => {}
}
}
}
}
fn classify_initializer(value: &Node, source: &str, config: &InitAnalysisConfig) -> InitState {
let text = value.utf8_text(source.as_bytes()).unwrap_or("");
if value.kind() == "call_expression" {
if let Some(func) = value.child_by_field_name("function") {
let func_name = func.utf8_text(source.as_bytes()).unwrap_or("");
match func_name {
"calloc" => return InitState::MallocInitialized,
"malloc" | "alloca" | "ALLOCA" => return InitState::MallocUninitialized,
"realloc" => return InitState::MallocUninitialized,
_ => {
if config.realloc_wrapper_fns.contains(func_name) {
return InitState::MallocUninitialized;
}
}
}
}
}
if value.kind() == "cast_expression" {
for i in 0..value.child_count() {
if let Some(child) = value.child(i) {
if child.kind() == "call_expression" {
return classify_initializer(&child, source, config);
}
}
}
}
if text.contains("malloc(") || text.contains("alloca(") || text.contains("ALLOCA(") {
return InitState::MallocUninitialized;
}
if text.contains("calloc(") || text.contains("zalloc(") {
return InitState::MallocInitialized;
}
if text.contains("realloc(") {
return InitState::MallocUninitialized;
}
InitState::Initialized
}
fn extract_allocation_count(value: &Node, source: &str) -> Option<usize> {
let inner = if value.kind() == "cast_expression" {
value.child_by_field_name("value")?
} else {
*value
};
if inner.kind() != "call_expression" {
let text = inner.utf8_text(source.as_bytes()).ok()?;
return extract_allocation_count_from_text(text);
}
let func = inner.child_by_field_name("function")?;
let func_name = func.utf8_text(source.as_bytes()).ok()?;
if !matches!(func_name, "malloc" | "alloca" | "ALLOCA" | "realloc") {
return None;
}
let args = inner.child_by_field_name("arguments")?;
let arg = {
let mut found = None;
for i in 0..args.child_count() {
if let Some(c) = args.child(i) {
if c.kind() != "(" && c.kind() != ")" && c.kind() != "," {
found = Some(c);
break;
}
}
}
found?
};
if arg.kind() == "binary_expression" {
let op = find_operator_text(&arg, source);
if op == "*" {
let left = arg.child_by_field_name("left")?;
let right = arg.child_by_field_name("right")?;
let macros: HashMap<String, i64> = HashMap::new();
if left.kind() == "sizeof_expression" {
let val = const_eval::try_evaluate_expr(&right, source, ¯os)?;
return if val > 0 { Some(val as usize) } else { None };
}
if right.kind() == "sizeof_expression" {
let val = const_eval::try_evaluate_expr(&left, source, ¯os)?;
return if val > 0 { Some(val as usize) } else { None };
}
}
}
None
}
fn extract_allocation_count_from_text(text: &str) -> Option<usize> {
let inner = if let Some(start) = text.find('(') {
&text[start + 1..text.rfind(')')?]
} else {
return None;
};
if let Some(sizeof_pos) = inner.find("sizeof") {
let before = inner[..sizeof_pos].trim().trim_end_matches('*').trim();
let macros: HashMap<String, i64> = HashMap::new();
if let Ok(val) = before.parse::<i64>() {
return if val > 0 { Some(val as usize) } else { None };
}
let _ = macros; }
None
}
fn find_operator_text<'a>(node: &Node, source: &'a str) -> &'a str {
for i in 0..node.child_count() {
if let Some(c) = node.child(i) {
let k = c.kind();
if matches!(
k,
"+" | "-" | "*" | "/" | "%" | "==" | "!=" | "<" | ">" | "<=" | ">="
) {
return c.utf8_text(source.as_bytes()).unwrap_or("");
}
}
}
""
}
fn is_partial_init_write(alloc_count: usize, node: &Node, source: &str) -> bool {
let mut current = node.parent();
while let Some(n) = current {
if n.kind() == "for_statement" {
if let Some(condition) = n.child_by_field_name("condition") {
if let Some(bound) = extract_for_upper_bound(&condition, source) {
return bound < alloc_count;
}
}
return false; }
if n.kind() == "function_definition" {
break;
}
current = n.parent();
}
false
}
fn extract_for_upper_bound(condition: &Node, source: &str) -> Option<usize> {
if condition.kind() != "binary_expression" {
return None;
}
let op = find_operator_text(condition, source);
let right = condition.child_by_field_name("right")?;
let macros = HashMap::new();
match op {
"<" => {
let val = const_eval::try_evaluate_expr(&right, source, ¯os)?;
if val > 0 {
Some(val as usize)
} else {
None
}
}
"<=" => {
let val = const_eval::try_evaluate_expr(&right, source, ¯os)?;
if val >= 0 {
Some((val + 1) as usize)
} else {
None
}
}
_ => None,
}
}
fn process_expression(
node: &Node,
source: &str,
state: &mut InitStateMap,
tracked_vars: &mut HashSet<String>,
config: &InitAnalysisConfig,
) {
match node.kind() {
"assignment_expression" => {
if let Some(right) = node.child_by_field_name("right") {
find_and_process_init_expressions(&right, source, state, tracked_vars, config);
}
if let Some(left) = node.child_by_field_name("left") {
let (var_name, has_subscript_in_chain) = if left.kind() == "identifier" {
(
left.utf8_text(source.as_bytes()).unwrap_or("").to_string(),
false,
)
} else if left.kind() == "pointer_expression" {
(extract_deref_target(&left, source), false)
} else if left.kind() == "subscript_expression" {
(extract_subscript_base(&left, source), true)
} else if left.kind() == "field_expression" {
let (name, has_sub) = extract_nested_base_ex(&left, source);
(name, has_sub)
} else {
(String::new(), false)
};
if !var_name.is_empty() {
let array_decay = if left.kind() == "identifier" {
if let Some(right) = node.child_by_field_name("right") {
if right.kind() == "identifier" {
let rhs_name = right.utf8_text(source.as_bytes()).unwrap_or("");
state.get(rhs_name).and_then(|rhs| {
if rhs.is_array && rhs.state.is_unsafe() && !rhs.is_char_type {
Some(rhs.allocation_count)
} else {
None
}
})
} else {
None
}
} else {
None
}
} else {
None
};
if let Some(info) = state.get_mut(&var_name) {
if left.kind() == "identifier" {
if let Some(array_alloc) = array_decay {
info.state = InitState::MallocUninitialized;
info.allocation_count = array_alloc;
} else if let Some(right) = node.child_by_field_name("right") {
info.state = classify_initializer(&right, source, config);
if matches!(info.state, InitState::MallocUninitialized) {
info.allocation_count =
extract_allocation_count(&right, source);
} else {
info.allocation_count = None;
}
} else {
info.state = InitState::Initialized;
info.allocation_count = None;
}
} else {
match info.state {
InitState::MallocUninitialized
if (left.kind() != "field_expression" || has_subscript_in_chain) => {
if let Some(alloc_count) = info.allocation_count {
if is_partial_init_write(alloc_count, node, source) {
} else {
info.state = InitState::MallocInitialized;
}
} else {
info.state = InitState::MallocInitialized;
}
}
InitState::Uninitialized => {
if left.kind() == "field_expression" && !has_subscript_in_chain
{
info.state = InitState::Initialized;
}
if left.kind() == "subscript_expression" {
info.state = InitState::Initialized;
}
}
_ => {}
}
}
}
}
}
}
"update_expression" => {
}
"call_expression" => {
process_call_expression(node, source, state, tracked_vars, config);
}
"comma_expression" => {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.kind() != "," {
process_expression(&child, source, state, tracked_vars, config);
}
}
}
}
"cast_expression" => {
if let Some(value) = node.child_by_field_name("value") {
process_expression(&value, source, state, tracked_vars, config);
}
}
"parenthesized_expression" => {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.kind() != "(" && child.kind() != ")" {
process_expression(&child, source, state, tracked_vars, config);
}
}
}
}
"gnu_asm_expression" => {
process_gnu_asm(node, source, state);
}
_ => {}
}
}
fn find_and_process_init_expressions(
node: &Node,
source: &str,
state: &mut InitStateMap,
tracked_vars: &mut HashSet<String>,
config: &InitAnalysisConfig,
) {
match node.kind() {
"assignment_expression" | "call_expression" | "comma_expression" | "update_expression" => {
process_expression(node, source, state, tracked_vars, config);
}
"gnu_asm_expression" => {
process_gnu_asm(node, source, state);
}
_ => {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
find_and_process_init_expressions(&child, source, state, tracked_vars, config);
}
}
}
}
}
fn process_misparsed_asm_call(node: &Node, source: &str, state: &mut InitStateMap) {
let Some(args) = node.child_by_field_name("arguments") else {
return;
};
collect_asm_output_inits(&args, source, state);
}
fn collect_asm_output_inits(node: &Node, source: &str, state: &mut InitStateMap) {
for i in 0..node.child_count() {
let Some(child) = node.child(i) else { continue };
match child.kind() {
"call_expression" => {
let Some(func) = child.child_by_field_name("function") else {
continue;
};
if func.kind() != "string_literal" {
continue;
}
let constraint = func.utf8_text(source.as_bytes()).unwrap_or("");
if !constraint.contains('=') {
continue; }
let Some(inner_args) = child.child_by_field_name("arguments") else {
continue;
};
for j in 0..inner_args.child_count() {
let Some(ident) = inner_args.child(j) else {
continue;
};
if ident.kind() == "identifier" {
let name = ident.utf8_text(source.as_bytes()).unwrap_or("");
if !name.is_empty() {
if let Some(info) = state.get_mut(name) {
info.state = InitState::Initialized;
info.allocation_count = None;
}
}
}
}
}
"ERROR" => {
collect_asm_output_inits(&child, source, state);
}
_ => {}
}
}
}
fn process_gnu_asm(node: &Node, source: &str, state: &mut InitStateMap) {
let Some(output_list) = node.child_by_field_name("output_operands") else {
return;
};
for i in 0..output_list.child_count() {
let Some(child) = output_list.child(i) else {
continue;
};
if child.kind() != "gnu_asm_output_operand" {
continue;
}
if let Some(val) = child.child_by_field_name("value") {
let name = val.utf8_text(source.as_bytes()).unwrap_or("");
if !name.is_empty() {
if let Some(info) = state.get_mut(name) {
info.state = InitState::Initialized;
info.allocation_count = None;
}
}
}
}
}
fn process_call_expression(
node: &Node,
source: &str,
state: &mut InitStateMap,
_tracked_vars: &mut HashSet<String>,
config: &InitAnalysisConfig,
) {
let func = match node.child_by_field_name("function") {
Some(f) => f,
None => return,
};
let func_name = func.utf8_text(source.as_bytes()).unwrap_or("").to_string();
let func_name_lower = func_name.to_lowercase();
if func.kind() == "string_literal" && func_name.contains('=') {
if let Some(args) = node.child_by_field_name("arguments") {
for i in 0..args.child_count() {
if let Some(arg) = args.child(i) {
if arg.kind() == "identifier" {
let name = arg.utf8_text(source.as_bytes()).unwrap_or("");
if !name.is_empty() {
if let Some(info) = state.get_mut(name) {
info.state = InitState::Initialized;
info.allocation_count = None;
}
}
}
}
}
}
return;
}
if func_name_lower.starts_with("__asm") || func_name_lower == "asm" {
process_misparsed_asm_call(node, source, state);
return;
}
if func_name == "va_start" || func_name == "va_copy" {
if let Some(args) = node.child_by_field_name("arguments") {
for i in 0..args.child_count() {
if let Some(arg) = args.child(i) {
if arg.kind() == "identifier" {
let var_name = arg.utf8_text(source.as_bytes()).unwrap_or("").to_string();
if let Some(info) = state.get_mut(&var_name) {
info.state = InitState::Initialized;
}
break; }
}
}
}
return;
}
if FOR_EACH_MACROS.contains(&func_name.as_str()) {
if let Some(args) = node.child_by_field_name("arguments") {
for i in 0..args.child_count() {
if let Some(arg) = args.child(i) {
if arg.kind() == "identifier" {
let var_name = arg.utf8_text(source.as_bytes()).unwrap_or("").to_string();
if let Some(info) = state.get_mut(&var_name) {
info.state = InitState::Initialized;
}
break;
}
}
}
}
return;
}
if let Some(base_name) = match_initializing_function(&func_name) {
let output_indices = get_output_arg_indices(base_name);
if let Some(args) = node.child_by_field_name("arguments") {
let mut arg_idx = 0;
for i in 0..args.child_count() {
if let Some(arg) = args.child(i) {
if arg.kind() == "," || arg.kind() == "(" || arg.kind() == ")" {
continue;
}
if output_indices.contains(&arg_idx) {
let var_name = extract_var_from_arg(&arg, source);
if !var_name.is_empty() {
if let Some(info) = state.get_mut(&var_name) {
if base_name == "memset"
&& matches!(
info.state,
InitState::MallocUninitialized
| InitState::MallocInitialized
)
{
info.state = InitState::MallocInitialized;
} else {
info.state = InitState::Initialized;
}
}
}
}
arg_idx += 1;
}
}
}
return;
}
if is_non_initializing_function(&func_name) {
return;
}
let cond_param_indices = config.conditionally_init_fns.get(&func_name);
let read_only_indices = config.read_only_deref_fns.get(&func_name);
if let Some(args) = node.child_by_field_name("arguments") {
let mut arg_idx: usize = 0;
for i in 0..args.child_count() {
if let Some(arg) = args.child(i) {
if arg.kind() == "," || arg.kind() == "(" || arg.kind() == ")" {
continue;
}
let skip_this_arg = cond_param_indices
.is_some_and(|indices| indices.contains(&arg_idx))
|| read_only_indices.is_some_and(|indices| indices.contains(&arg_idx));
if !skip_this_arg && arg.kind() == "pointer_expression" {
let arg_text = arg.utf8_text(source.as_bytes()).unwrap_or("");
if arg_text.starts_with('&') {
let var_name = extract_var_from_arg(&arg, source);
if !var_name.is_empty() {
if let Some(info) = state.get_mut(&var_name) {
info.state = InitState::Initialized;
}
}
}
}
if !skip_this_arg && arg.kind() == "identifier" {
let var_name = arg.utf8_text(source.as_bytes()).unwrap_or("").to_string();
if let Some(info) = state.get(&var_name) {
if info.is_array {
let info = state.get_mut(&var_name).unwrap();
info.state = InitState::Initialized;
}
}
}
arg_idx += 1;
}
}
}
}
#[allow(dead_code)]
pub fn analyze_init_states(
cfg: &FunctionCfg,
func_node: &Node,
source: &str,
) -> InitAnalysisResult {
analyze_init_states_with_statics(
cfg,
func_node,
source,
&InitStateMap::new(),
&InitAnalysisConfig::default(),
)
}
pub fn analyze_init_states_with_statics(
cfg: &FunctionCfg,
func_node: &Node,
source: &str,
static_vars: &InitStateMap,
config: &InitAnalysisConfig,
) -> InitAnalysisResult {
let body = match func_node.child_by_field_name("body") {
Some(b) => b,
None => {
return InitAnalysisResult {
block_entry_states: HashMap::new(),
block_exit_states: HashMap::new(),
tracked_vars: HashSet::new(),
};
}
};
let mut tracked_vars = HashSet::new();
let mut initial_state = InitStateMap::new();
for (name, info) in static_vars {
initial_state.insert(name.clone(), info.clone());
tracked_vars.insert(name.clone());
}
collect_param_init_state(func_node, source, &mut initial_state, &mut tracked_vars);
let mut entry_states: HashMap<BlockId, InitStateMap> = HashMap::new();
let mut exit_states: HashMap<BlockId, InitStateMap> = HashMap::new();
for block in &cfg.blocks {
entry_states.insert(block.id, InitStateMap::new());
exit_states.insert(block.id, InitStateMap::new());
}
entry_states.insert(cfg.entry, initial_state.clone());
let entry_exit = apply_transfer(
&cfg.blocks[cfg.entry],
&initial_state,
&body,
source,
&mut tracked_vars,
config,
);
exit_states.insert(cfg.entry, entry_exit);
let mut visited: HashSet<BlockId> = HashSet::new();
visited.insert(cfg.entry);
let mut worklist: VecDeque<BlockId> = VecDeque::new();
for (succ, _) in cfg.successors(cfg.entry) {
worklist.push_back(succ);
}
let mut iterations = 0;
let max_iterations = 500 * cfg.blocks.len().max(1);
while let Some(block_id) = worklist.pop_front() {
iterations += 1;
if iterations > max_iterations {
break;
}
let preds = cfg.predecessors(block_id);
let mut new_entry = InitStateMap::new();
let mut first = true;
for (pred_id, edge_kind) in &preds {
if !visited.contains(pred_id) {
continue;
}
if matches!(edge_kind, CfgEdge::TrueBranch | CfgEdge::FalseBranch) {
let pred_block = &cfg.blocks[*pred_id];
if let Some(cond_val) = try_evaluate_block_condition(
pred_block,
&body,
source,
&config.file_scope_constants,
) {
let on_true_edge = matches!(edge_kind, CfgEdge::TrueBranch);
if cond_val != on_true_edge {
continue; }
}
}
let pred_exit = exit_states.get(pred_id).cloned().unwrap_or_default();
if first {
new_entry = pred_exit;
first = false;
} else if matches!(edge_kind, CfgEdge::Goto) {
new_entry = join_with_goto(&new_entry, &pred_exit);
} else {
new_entry = join_states(&new_entry, &pred_exit);
}
}
if first {
continue;
}
let block = &cfg.blocks[block_id];
let new_exit = apply_transfer(block, &new_entry, &body, source, &mut tracked_vars, config);
let old_exit = exit_states.get(&block_id);
let changed = match old_exit {
None => true,
Some(old) => *old != new_exit,
};
visited.insert(block_id);
if changed {
entry_states.insert(block_id, new_entry);
exit_states.insert(block_id, new_exit);
for (succ, _) in cfg.successors(block_id) {
if !worklist.contains(&succ) {
worklist.push_back(succ);
}
}
} else {
entry_states.insert(block_id, new_entry);
}
}
InitAnalysisResult {
block_entry_states: entry_states,
block_exit_states: exit_states,
tracked_vars,
}
}
#[allow(dead_code)]
pub fn get_var_info_at(
result: &InitAnalysisResult,
cfg: &FunctionCfg,
body: &Node,
source: &str,
var_name: &str,
byte_offset: usize,
) -> Option<VarInfo> {
get_var_info_at_with_config(
result,
cfg,
body,
source,
var_name,
byte_offset,
&InitAnalysisConfig::default(),
)
}
pub fn get_var_info_at_with_config(
result: &InitAnalysisResult,
cfg: &FunctionCfg,
body: &Node,
source: &str,
var_name: &str,
byte_offset: usize,
config: &InitAnalysisConfig,
) -> Option<VarInfo> {
let block = find_block_containing(cfg, byte_offset)?;
let entry = result.block_entry_states.get(&block.id)?;
let mut state = entry.clone();
let mut tracked = result.tracked_vars.clone();
for &(start, end) in &block.statements {
if start >= byte_offset {
break;
}
if end <= byte_offset {
if let Some(stmt_node) = find_node_at_range(body, start, end) {
process_statement(&stmt_node, source, &mut state, &mut tracked, config);
}
} else {
if let Some(stmt_node) = find_node_at_range(body, start, end) {
if stmt_node.kind() == "switch_statement" {
replay_within_switch_case(
&stmt_node,
source,
&mut state,
&mut tracked,
config,
byte_offset,
);
}
}
}
}
state.get(var_name).cloned()
}
fn find_block_containing(cfg: &FunctionCfg, byte_offset: usize) -> Option<&BasicBlock> {
for block in &cfg.blocks {
for &(start, end) in &block.statements {
if byte_offset >= start && byte_offset < end {
return Some(block);
}
}
}
cfg.blocks.iter().find(|block| {
block.byte_range.0 > 0
&& byte_offset >= block.byte_range.0
&& byte_offset < block.byte_range.1
})
}
fn collect_param_init_state(
func_node: &Node,
source: &str,
state: &mut InitStateMap,
tracked_vars: &mut HashSet<String>,
) {
let declarator = match func_node.child_by_field_name("declarator") {
Some(d) => d,
None => return,
};
let func_decl = find_function_declarator(&declarator);
let func_decl = match func_decl {
Some(d) => d,
None => return,
};
for i in 0..func_decl.child_count() {
if let Some(child) = func_decl.child(i) {
if child.kind() == "parameter_list" {
for j in 0..child.child_count() {
if let Some(param) = child.child(j) {
if param.kind() == "parameter_declaration" {
let param_name = get_declarator_name(¶m, source);
if !param_name.is_empty() {
let type_text = extract_type_text(¶m, source);
let is_unsigned_char = type_text.contains("unsigned char")
|| type_text.contains("uint8_t");
let is_array = param_has_array_declarator(¶m);
let mut info = VarInfo::new(InitState::Initialized);
info.is_unsigned_char = is_unsigned_char;
info.is_array = is_array;
state.insert(param_name.clone(), info);
tracked_vars.insert(param_name);
}
}
}
}
}
}
}
}
fn find_function_declarator<'a>(node: &Node<'a>) -> Option<Node<'a>> {
if node.kind() == "function_declarator" {
return Some(*node);
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if let Some(found) = find_function_declarator(&child) {
return Some(found);
}
}
}
None
}
fn extract_type_text(node: &Node, source: &str) -> String {
let mut parts = Vec::new();
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
match child.kind() {
"primitive_type"
| "sized_type_specifier"
| "type_identifier"
| "storage_class_specifier"
| "type_qualifier" => {
if let Ok(t) = child.utf8_text(source.as_bytes()) {
parts.push(t.to_string());
}
}
_ => {}
}
}
}
parts.join(" ")
}
fn get_declarator_name(node: &Node, source: &str) -> String {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.kind() == "identifier" {
return child.utf8_text(source.as_bytes()).unwrap_or("").to_string();
}
}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
match child.kind() {
"init_declarator"
| "pointer_declarator"
| "array_declarator"
| "function_declarator" => {
let name = get_declarator_name(&child, source);
if !name.is_empty() {
return name;
}
}
_ => {}
}
}
}
String::new()
}
fn is_array_declarator(node: &Node) -> bool {
if node.kind() == "array_declarator" {
return true;
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if is_array_declarator(&child) {
return true;
}
}
}
false
}
fn param_has_array_declarator(node: &Node) -> bool {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if child.kind() == "array_declarator" {
return true;
}
if param_has_array_declarator(&child) {
return true;
}
}
}
false
}
fn is_type_keyword(s: &str) -> bool {
matches!(
s,
"int"
| "char"
| "short"
| "long"
| "float"
| "double"
| "void"
| "unsigned"
| "signed"
| "const"
| "volatile"
| "static"
| "extern"
| "register"
| "auto"
| "struct"
| "union"
| "enum"
| "typedef"
)
}
fn get_text(_parent: &Node, child: &Node, source: &str) -> String {
child.utf8_text(source.as_bytes()).unwrap_or("").to_string()
}
fn extract_var_from_arg(arg: &Node, source: &str) -> String {
if arg.kind() == "pointer_expression" {
let text = arg.utf8_text(source.as_bytes()).unwrap_or("");
if text.starts_with('&') {
if let Some(inner) = arg.child_by_field_name("argument") {
if inner.kind() == "identifier" {
return inner.utf8_text(source.as_bytes()).unwrap_or("").to_string();
}
}
}
} else if arg.kind() == "identifier" {
return arg.utf8_text(source.as_bytes()).unwrap_or("").to_string();
}
String::new()
}
fn extract_deref_target(node: &Node, source: &str) -> String {
if let Some(arg) = node.child_by_field_name("argument") {
if arg.kind() == "identifier" {
return arg.utf8_text(source.as_bytes()).unwrap_or("").to_string();
}
}
String::new()
}
fn extract_subscript_base(node: &Node, source: &str) -> String {
extract_nested_base(node, source)
}
fn extract_nested_base_ex(node: &Node, source: &str) -> (String, bool) {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
match child.kind() {
"identifier" => {
let name = child.utf8_text(source.as_bytes()).unwrap_or("").to_string();
return (name, false);
}
"subscript_expression" => {
let (name, _) = extract_nested_base_ex(&child, source);
if !name.is_empty() {
return (name, true);
}
}
"field_expression" | "parenthesized_expression" => {
let result = extract_nested_base_ex(&child, source);
if !result.0.is_empty() {
return result;
}
}
_ => {}
}
}
}
(String::new(), false)
}
fn extract_nested_base(node: &Node, source: &str) -> String {
extract_nested_base_ex(node, source).0
}
pub fn collect_file_scope_statics(root: &Node, source: &str) -> InitStateMap {
let mut state = InitStateMap::new();
collect_file_scope_statics_recursive(root, source, &mut state);
state
}
fn collect_file_scope_statics_recursive(node: &Node, source: &str, state: &mut InitStateMap) {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
match child.kind() {
"declaration" => {
let type_text = extract_type_text(&child, source);
let is_static =
type_text.contains("static") || type_text.contains("_Thread_local");
if is_static {
let mut tracked = HashSet::new();
process_declaration(
&child,
source,
state,
&mut tracked,
&InitAnalysisConfig::default(),
);
}
}
"preproc_ifdef" | "preproc_if" | "preproc_else" | "preproc_elif" => {
collect_file_scope_statics_recursive(&child, source, state);
}
_ => {}
}
}
}
}
pub fn collect_file_scope_constants(root: &Node, source: &str) -> HashMap<String, i64> {
let mut constants = HashMap::new();
collect_constants_recursive(root, source, &mut constants);
constants
}
fn collect_constants_recursive(node: &Node, source: &str, constants: &mut HashMap<String, i64>) {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
match child.kind() {
"declaration" => {
let type_text = extract_type_text(&child, source);
let is_static_or_const =
type_text.contains("static") || type_text.contains("const");
if !is_static_or_const {
continue;
}
for j in 0..child.child_count() {
if let Some(decl) = child.child(j) {
if decl.kind() == "init_declarator" {
let name = get_declarator_name(&decl, source);
if name.is_empty() {
continue;
}
if let Some(value) = decl.child_by_field_name("value") {
let empty_macros: HashMap<String, i64> = HashMap::new();
if let Some(val) =
const_eval::try_evaluate_expr(&value, source, &empty_macros)
{
constants.insert(name, val);
}
}
}
}
}
}
"preproc_ifdef" | "preproc_if" | "preproc_else" | "preproc_elif" => {
collect_constants_recursive(&child, source, constants);
}
_ => {}
}
}
}
}
pub fn collect_constant_functions(root: &Node, source: &str) -> HashMap<String, i64> {
let mut result = HashMap::new();
collect_constant_functions_in(root, source, &mut result);
result
}
fn collect_constant_functions_in(node: &Node, source: &str, result: &mut HashMap<String, i64>) {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
match child.kind() {
"function_definition" => {
if let Some((name, val)) = extract_constant_function(&child, source) {
result.insert(name, val);
}
}
"preproc_ifdef" | "preproc_if" | "preproc_else" | "preproc_elif"
| "preproc_ifndef" => {
collect_constant_functions_in(&child, source, result);
}
_ => {}
}
}
}
}
fn extract_constant_function(func_node: &Node, source: &str) -> Option<(String, i64)> {
let declarator = func_node.child_by_field_name("declarator")?;
let func_decl = find_function_declarator(&declarator)?;
let params = func_decl.child_by_field_name("parameters")?;
let named_count = params.named_child_count();
if named_count > 1 {
return None;
}
if named_count == 1 {
let p = params.named_child(0)?;
let text = p.utf8_text(source.as_bytes()).ok()?;
if text != "void" {
return None;
}
}
let name = get_declarator_name(&func_decl, source);
if name.is_empty() {
return None;
}
let body = func_node.child_by_field_name("body")?;
let mut return_val: Option<i64> = None;
let mut non_return_stmts = 0usize;
for i in 0..body.child_count() {
if let Some(stmt) = body.child(i) {
match stmt.kind() {
"{" | "}" => {}
"return_statement" => {
for j in 0..stmt.child_count() {
if let Some(child) = stmt.child(j) {
if child.kind() != "return" && child.kind() != ";" {
let empty: HashMap<String, i64> = HashMap::new();
return_val = const_eval::try_evaluate_expr(&child, source, &empty);
}
}
}
}
_ => {
non_return_stmts += 1;
}
}
}
}
if non_return_stmts > 0 {
return None;
}
return_val.map(|v| (name, v))
}
fn replay_within_switch_case(
switch_node: &Node,
source: &str,
state: &mut InitStateMap,
tracked: &mut HashSet<String>,
config: &InitAnalysisConfig,
byte_offset: usize,
) {
for i in 0..switch_node.child_count() {
if let Some(child) = switch_node.child(i) {
if child.kind() == "compound_statement" {
for j in 0..child.child_count() {
if let Some(case_node) = child.child(j) {
if case_node.kind() == "case_statement"
&& case_node.start_byte() < byte_offset
&& case_node.end_byte() > byte_offset
{
for k in 0..case_node.child_count() {
if let Some(stmt) = case_node.child(k) {
if stmt.end_byte() <= byte_offset {
process_statement(&stmt, source, state, tracked, config);
} else if stmt.start_byte() >= byte_offset {
break;
}
}
}
return;
}
}
}
break;
}
}
}
}
pub fn try_evaluate_block_condition(
block: &BasicBlock,
body: &Node,
source: &str,
constants: &HashMap<String, i64>,
) -> Option<bool> {
let (cond_start, cond_end) = block.condition_range?;
let cond_node = body.descendant_for_byte_range(cond_start, cond_end)?;
let val = const_eval::try_evaluate_expr(&cond_node, source, constants)?;
Some(val != 0) }
#[cfg(test)]
mod tests {
use super::*;
use crate::analyze::cfg::build_function_cfg;
fn analyze_code(code: &str) -> (FunctionCfg, InitAnalysisResult, String) {
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_c::language()).unwrap();
let tree = parser.parse(code, None).unwrap();
let root = tree.root_node();
for i in 0..root.child_count() {
if let Some(child) = root.child(i) {
if child.kind() == "function_definition" {
let cfg = build_function_cfg(&child, code).unwrap();
let result = analyze_init_states(&cfg, &child, code);
return (cfg, result, code.to_string());
}
}
}
panic!("No function definition found");
}
#[test]
fn test_uninitialized_simple() {
let code = "void foo() { int x; x = x + 1; }";
let (cfg, result, source) = analyze_code(code);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_c::language()).unwrap();
let tree = parser.parse(&source, None).unwrap();
let root = tree.root_node();
let func = root.child(0).unwrap();
let body = func.child_by_field_name("body").unwrap();
let info = get_var_info_at(
&result,
&cfg,
&body,
&source,
"x",
code.find("x + 1").unwrap(),
);
assert!(info.is_some());
assert!(info.unwrap().state.is_unsafe());
}
#[test]
fn test_initialized_simple() {
let code = "void foo() { int x = 5; int y = x + 1; }";
let (cfg, result, source) = analyze_code(code);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_c::language()).unwrap();
let tree = parser.parse(&source, None).unwrap();
let func = tree.root_node().child(0).unwrap();
let body = func.child_by_field_name("body").unwrap();
let info = get_var_info_at(
&result,
&cfg,
&body,
&source,
"x",
source.find("x + 1").unwrap(),
);
assert!(info.is_some());
assert!(!info.unwrap().state.is_unsafe());
}
#[test]
fn test_conditional_init_both_branches() {
let code = r#"
void foo(int c) {
int x;
if (c) {
x = 1;
} else {
x = 2;
}
int y = x;
}
"#;
let (cfg, result, source) = analyze_code(code);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_c::language()).unwrap();
let tree = parser.parse(&source, None).unwrap();
let func = tree.root_node().child(0).unwrap();
let body = func.child_by_field_name("body").unwrap();
let use_pos = source.rfind("x;").unwrap();
let info = get_var_info_at(&result, &cfg, &body, &source, "x", use_pos);
assert!(info.is_some());
assert!(
!info.unwrap().state.is_unsafe(),
"x should be initialized after if/else"
);
}
#[test]
fn test_conditional_init_one_branch() {
let code = r#"
void foo(int c) {
int x;
if (c) {
x = 1;
}
int y = x;
}
"#;
let (cfg, result, source) = analyze_code(code);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_c::language()).unwrap();
let tree = parser.parse(&source, None).unwrap();
let func = tree.root_node().child(0).unwrap();
let body = func.child_by_field_name("body").unwrap();
let use_pos = source.rfind("x;").unwrap();
let info = get_var_info_at(&result, &cfg, &body, &source, "x", use_pos);
assert!(info.is_some());
assert!(
info.unwrap().state.is_unsafe(),
"x should be maybe-uninitialized after if without else"
);
}
#[test]
fn test_malloc_uninitialized() {
let code = r#"
void foo() {
int *p = malloc(sizeof(int));
*p = 42;
}
"#;
let (cfg, result, source) = analyze_code(code);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_c::language()).unwrap();
let tree = parser.parse(&source, None).unwrap();
let func = tree.root_node().child(0).unwrap();
let body = func.child_by_field_name("body").unwrap();
let deref_pos = source.find("*p = 42").unwrap();
let info = get_var_info_at(&result, &cfg, &body, &source, "p", deref_pos);
assert!(info.is_some());
let info = info.unwrap();
assert_eq!(info.state, InitState::MallocUninitialized);
assert!(!info.state.is_unsafe()); assert!(info.state.is_content_unsafe()); }
#[test]
fn test_parameter_initialized() {
let code = "void foo(int x) { int y = x; }";
let (cfg, result, source) = analyze_code(code);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&tree_sitter_c::language()).unwrap();
let tree = parser.parse(&source, None).unwrap();
let func = tree.root_node().child(0).unwrap();
let body = func.child_by_field_name("body").unwrap();
let info = get_var_info_at(
&result,
&cfg,
&body,
&source,
"x",
source.find("x;").unwrap(),
);
assert!(info.is_some());
assert!(!info.unwrap().state.is_unsafe());
}
}