use crate::analyze::cfg;
use crate::analyze::const_eval;
use crate::analyze::context::ProjectContext;
use crate::analyze::function_summary::FunctionSummary;
use crate::manifest::{RuleCategory, Severity};
use crate::rules::{CertRule, RuleViolation};
use crate::utility::cert_c::ast_utils::{get_node_text, is_function_parameter};
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use tree_sitter::Node;
const TAINT_SOURCES: &[&str] = &[
"recv",
"recvfrom",
"recvmsg",
"WSARecv",
"WSARecvFrom",
"accept",
"read",
"fread",
"fgets",
"gets",
"getchar",
"getc",
"fgetc",
"scanf",
"fscanf",
"sscanf",
"vscanf",
"vfscanf",
"fgetws",
"getwchar",
"getwc",
"fgetwc",
"wscanf",
"fwscanf",
"swscanf",
"vwscanf",
"vfwscanf",
"_getws",
"_getws_s",
"getenv",
"secure_getenv",
"_wgetenv",
"_wgetenv_s",
"ReadFile",
"ReadConsole",
"ReadConsoleA",
"ReadConsoleW",
"RegQueryValueExA",
"RegQueryValueExW",
];
pub struct Env03C {
project_aliases: RefCell<HashMap<String, String>>,
current_aliases: RefCell<HashMap<String, String>>,
function_summaries: RefCell<HashMap<String, FunctionSummary>>,
callers: RefCell<HashMap<String, HashSet<String>>>,
global_writers: RefCell<HashMap<String, HashSet<String>>>,
file_string_macros: RefCell<HashMap<String, String>>,
}
impl Env03C {
pub fn new() -> Self {
Self {
project_aliases: RefCell::new(HashMap::new()),
current_aliases: RefCell::new(HashMap::new()),
function_summaries: RefCell::new(HashMap::new()),
callers: RefCell::new(HashMap::new()),
global_writers: RefCell::new(HashMap::new()),
file_string_macros: RefCell::new(HashMap::new()),
}
}
}
impl Default for Env03C {
fn default() -> Self {
Self::new()
}
}
impl CertRule for Env03C {
fn rule_id(&self) -> &'static str {
"ENV03-C"
}
fn description(&self) -> &'static str {
"Sanitize the environment when invoking external programs"
}
fn severity(&self) -> Severity {
Severity::High
}
fn category(&self) -> RuleCategory {
RuleCategory::Recommendation
}
fn cert_id(&self) -> &'static str {
"ENV03-C"
}
fn set_project_context(&self, context: &ProjectContext) {
*self.project_aliases.borrow_mut() = context.macro_aliases.clone();
*self.function_summaries.borrow_mut() = context.function_summaries.clone();
*self.global_writers.borrow_mut() = context.global_writers.clone();
let mut callers: HashMap<String, HashSet<String>> = HashMap::new();
for (caller, callees) in &context.call_graph {
for callee in callees {
callers
.entry(callee.clone())
.or_default()
.insert(caller.clone());
}
}
*self.callers.borrow_mut() = callers;
}
fn check(&self, node: &Node, source: &str) -> Vec<RuleViolation> {
let mut aliases = self.project_aliases.borrow().clone();
aliases.extend(const_eval::collect_macro_aliases(node, source));
*self.current_aliases.borrow_mut() = aliases;
*self.file_string_macros.borrow_mut() =
const_eval::collect_string_literal_macros(node, source);
let mut violations = Vec::new();
self.check_all_calls(node, source, &mut violations);
violations
}
}
impl Env03C {
fn resolve_name(&self, name: &str) -> String {
let aliases = self.current_aliases.borrow();
if let Some(target) = aliases.get(name) {
target.clone()
} else {
name.to_string()
}
}
fn check_all_calls(&self, node: &Node, source: &str, violations: &mut Vec<RuleViolation>) {
if node.kind() == "call_expression" {
if let Some(function) = node.child_by_field_name("function") {
let func_name = get_node_text(&function, source);
let resolved = self.resolve_name(&func_name);
if resolved == "system" || resolved == "popen" {
let scope = self.find_containing_function_or_root(node);
let scope_node = scope.unwrap_or(*node);
let mut has_sanitization = false;
self.check_for_sanitization(&scope_node, source, &mut has_sanitization);
if !has_sanitization && self.command_arg_is_untrusted(node, &scope_node, source)
{
let start_point = node.start_position();
let call_text = get_node_text(node, source);
violations.push(RuleViolation {
rule_id: self.rule_id().to_string(),
severity: Severity::High,
message: format!(
"External program invocation '{}' without environment sanitization. \
The environment should be sanitized before invoking external programs \
to prevent environment variable manipulation attacks.",
call_text
),
file_path: String::new(),
line: start_point.row + 1,
column: start_point.column + 1,
suggestion: Some(
"Call clearenv() to clear the environment, then use setenv() to set \
PATH and IFS to known safe values before invoking external programs."
.to_string(),
),
..Default::default()
});
}
}
}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
self.check_all_calls(&child, source, violations);
}
}
}
fn find_containing_function_or_root<'a>(&self, node: &Node<'a>) -> Option<Node<'a>> {
let mut current = *node;
let mut root = *node;
while let Some(parent) = current.parent() {
if parent.kind() == "function_definition" {
return Some(parent);
}
root = parent;
current = parent;
}
Some(root)
}
fn check_for_sanitization(&self, node: &Node, source: &str, has_sanitization: &mut bool) {
if *has_sanitization {
return; }
if node.kind() == "call_expression" {
if let Some(function) = node.child_by_field_name("function") {
let func_name = get_node_text(&function, source);
if func_name == "clearenv" {
*has_sanitization = true;
return;
}
if func_name == "setenv" {
if let Some(args) = node.child_by_field_name("arguments") {
for i in 0..args.child_count() {
if let Some(arg) = args.child(i) {
if arg.kind() == "string_literal" {
let arg_text = get_node_text(&arg, source);
if arg_text.contains("PATH") || arg_text.contains("IFS") {
*has_sanitization = true;
return;
}
}
break; }
}
}
}
if func_name == "putenv" {
if let Some(args) = node.child_by_field_name("arguments") {
for i in 0..args.child_count() {
if let Some(arg) = args.child(i) {
if arg.kind() == "string_literal" {
let arg_text = get_node_text(&arg, source);
if arg_text.contains("PATH=") || arg_text.contains("IFS=") {
*has_sanitization = true;
return;
}
}
break;
}
}
}
}
}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
self.check_for_sanitization(&child, source, has_sanitization);
}
}
}
fn command_arg_is_untrusted(&self, call: &Node, scope: &Node, source: &str) -> bool {
let Some(args) = call.child_by_field_name("arguments") else {
return true;
};
let Some(first_arg) = first_non_paren_arg(&args) else {
return true;
};
let arg = strip_casts_and_parens(first_arg);
if arg.kind() != "identifier" {
return true;
}
if scope.kind() != "function_definition" {
return true;
}
let var_name = get_node_text(&arg, source);
if is_function_parameter(scope, var_name, source) {
return !self.callers_are_all_clean(scope, source);
}
if scope_has_taint_source(scope, source) {
return true;
}
if is_command_var_parameter_derived(scope, var_name, source) {
return !self.callers_are_all_clean(scope, source);
}
let summaries = self.function_summaries.borrow();
let global_writers = self.global_writers.borrow();
let string_macros = self.file_string_macros.borrow();
!is_command_var_locally_safe(
scope,
var_name,
source,
&summaries,
&global_writers,
&string_macros,
)
}
fn callers_are_all_clean(&self, scope: &Node, source: &str) -> bool {
let Some(scope_name) = cfg::get_function_name(scope, source) else {
return false;
};
let callers = self.callers.borrow();
let Some(caller_set) = callers.get(scope_name) else {
return false;
};
if caller_set.is_empty() {
return false;
}
let summaries = self.function_summaries.borrow();
for caller in caller_set {
match summaries.get(caller) {
Some(s) if !s.has_env03_taint_source && !s.has_relative_command_write => {}
_ => return false,
}
}
true
}
}
fn first_non_paren_arg<'a>(args: &Node<'a>) -> Option<Node<'a>> {
for i in 0..args.child_count() {
if let Some(child) = args.child(i) {
if child.is_named() {
return Some(child);
}
}
}
None
}
fn strip_casts_and_parens<'a>(mut node: Node<'a>) -> Node<'a> {
loop {
match node.kind() {
"parenthesized_expression" => {
if let Some(inner) = node.named_child(0) {
node = inner;
continue;
}
break;
}
"cast_expression" => {
if let Some(value) = node.child_by_field_name("value") {
node = value;
continue;
}
break;
}
_ => break,
}
}
node
}
fn scope_has_taint_source(scope: &Node, source: &str) -> bool {
let mut found = false;
walk_for_taint(scope, source, &mut found);
found
}
fn walk_for_taint(node: &Node, source: &str, found: &mut bool) {
if *found {
return;
}
if node.kind() == "call_expression" {
if let Some(function) = node.child_by_field_name("function") {
let name = get_node_text(&function, source);
let ident = trailing_identifier(name);
if TAINT_SOURCES.contains(&ident) {
*found = true;
return;
}
let lower = ident.to_lowercase();
if lower != ident && TAINT_SOURCES.contains(&lower.as_str()) {
*found = true;
return;
}
}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
walk_for_taint(&child, source, found);
if *found {
return;
}
}
}
}
fn trailing_identifier(name: &str) -> &str {
name.rsplit(|c: char| !c.is_alphanumeric() && c != '_')
.next()
.unwrap_or(name)
}
fn is_command_var_parameter_derived(scope: &Node, var_name: &str, source: &str) -> bool {
let Some(body) = scope.child_by_field_name("body") else {
return false;
};
let params: HashSet<String> = collect_param_names(scope, source).into_iter().collect();
if params.is_empty() {
return false;
}
let mut writes: HashMap<String, Vec<Node>> = HashMap::new();
collect_variable_writes(&body, source, &mut writes);
let mut derived = params.clone();
loop {
let before = derived.len();
for (name, rhs_list) in &writes {
if derived.contains(name) {
continue;
}
if !rhs_list.is_empty()
&& rhs_list
.iter()
.all(|r| is_param_derived_expr(r, &derived, source))
{
derived.insert(name.clone());
}
}
if derived.len() == before {
break;
}
}
!params.contains(var_name) && derived.contains(var_name)
}
fn collect_param_names(scope: &Node, source: &str) -> Vec<String> {
let mut names = Vec::new();
let Some(decl) = scope.child_by_field_name("declarator") else {
return names;
};
let Some(params) = find_parameter_list(&decl) else {
return names;
};
for i in 0..params.child_count() {
let Some(child) = params.child(i) else {
continue;
};
if child.kind() != "parameter_declaration" {
continue;
}
let Some(pd) = child.child_by_field_name("declarator") else {
continue;
};
if let Some(name) = extract_declarator_name(&pd, source) {
names.push(name);
}
}
names
}
fn find_parameter_list<'a>(node: &Node<'a>) -> Option<Node<'a>> {
if node.kind() == "function_declarator" {
for i in 0..node.child_count() {
if let Some(c) = node.child(i) {
if c.kind() == "parameter_list" {
return Some(c);
}
}
}
}
for i in 0..node.child_count() {
if let Some(c) = node.child(i) {
if let Some(found) = find_parameter_list(&c) {
return Some(found);
}
}
}
None
}
fn collect_variable_writes<'a>(
node: &Node<'a>,
source: &str,
writes: &mut HashMap<String, Vec<Node<'a>>>,
) {
match node.kind() {
"init_declarator" => {
if let Some(decl) = node.child_by_field_name("declarator") {
if let Some(name) = extract_declarator_name(&decl, source) {
if let Some(value) = node.child_by_field_name("value") {
writes.entry(name).or_default().push(value);
}
}
}
}
"assignment_expression" => {
if let Some(lhs) = node.child_by_field_name("left") {
let lhs = strip_casts_and_parens(lhs);
if lhs.kind() == "identifier" {
let op_is_plain = node_operator(node, source)
.map(|op| op == "=")
.unwrap_or(true);
if op_is_plain {
if let Some(rhs) = node.child_by_field_name("right") {
let name = get_node_text(&lhs, source).to_string();
writes.entry(name).or_default().push(rhs);
}
}
}
}
}
_ => {}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
collect_variable_writes(&child, source, writes);
}
}
}
fn is_param_derived_expr(expr: &Node, derived: &HashSet<String>, source: &str) -> bool {
let e = strip_casts_and_parens(*expr);
match e.kind() {
"identifier" => derived.contains(get_node_text(&e, source)),
"pointer_expression" | "unary_expression" => e
.child_by_field_name("argument")
.is_some_and(|a| is_param_derived_expr(&a, derived, source)),
"subscript_expression" => e
.child_by_field_name("argument")
.is_some_and(|a| is_param_derived_expr(&a, derived, source)),
"field_expression" => e
.child_by_field_name("argument")
.is_some_and(|a| is_param_derived_expr(&a, derived, source)),
_ => false,
}
}
fn is_command_var_locally_safe(
scope: &Node,
var_name: &str,
source: &str,
summaries: &HashMap<String, FunctionSummary>,
global_writers: &HashMap<String, HashSet<String>>,
string_macros: &HashMap<String, String>,
) -> bool {
let body = match scope.child_by_field_name("body") {
Some(b) => b,
None => return false,
};
let local_buffers = collect_local_literal_buffers(&body, source, summaries, global_writers);
let mut all_safe = true;
check_writes(
&body,
var_name,
&local_buffers,
source,
summaries,
string_macros,
&mut all_safe,
);
all_safe
}
fn collect_local_literal_buffers(
body: &Node,
source: &str,
summaries: &HashMap<String, FunctionSummary>,
global_writers: &HashMap<String, HashSet<String>>,
) -> HashSet<String> {
let mut buffers = HashSet::new();
for (global_name, writers) in global_writers {
if writers.iter().all(|w| match summaries.get(w) {
Some(s) => !s.has_env03_taint_source && !s.returns_tainted,
None => false,
}) {
buffers.insert(global_name.clone());
}
}
walk_buffer_decls(body, source, &mut buffers);
loop {
let before = buffers.len();
walk_pointer_aliases(body, source, summaries, &mut buffers);
if buffers.len() == before {
break;
}
}
buffers
}
fn walk_buffer_decls(node: &Node, source: &str, out: &mut HashSet<String>) {
if node.kind() == "declaration" {
for i in 0..node.child_count() {
let Some(child) = node.child(i) else { continue };
let (decl_node, init_node) = match child.kind() {
"array_declarator" => (child, None),
"init_declarator" => {
let Some(d) = child.child_by_field_name("declarator") else {
continue;
};
if d.kind() != "array_declarator" {
continue;
}
(d, child.child_by_field_name("value"))
}
_ => continue,
};
if !initializer_is_safe_for_buffer(&init_node) {
continue;
}
if let Some(name) = extract_declarator_name(&decl_node, source) {
out.insert(name);
}
}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
walk_buffer_decls(&child, source, out);
}
}
}
fn walk_pointer_aliases(
node: &Node,
source: &str,
summaries: &HashMap<String, FunctionSummary>,
out: &mut HashSet<String>,
) {
match node.kind() {
"declaration" => {
for i in 0..node.child_count() {
let Some(child) = node.child(i) else { continue };
if child.kind() != "init_declarator" {
continue;
}
let Some(decl) = child.child_by_field_name("declarator") else {
continue;
};
if decl.kind() != "pointer_declarator" {
continue;
}
let Some(value) = child.child_by_field_name("value") else {
continue;
};
if rhs_is_safe(Some(&value), out, source, summaries) {
if let Some(name) = extract_declarator_name(&decl, source) {
out.insert(name);
}
}
}
}
"assignment_expression" => {
if let Some(lhs) = node.child_by_field_name("left") {
let lhs = strip_casts_and_parens(lhs);
let op_is_plain = node_operator(node, source)
.map(|op| op == "=")
.unwrap_or(true);
if lhs.kind() == "identifier" && op_is_plain {
let rhs = node.child_by_field_name("right");
if rhs_is_safe(rhs.as_ref(), out, source, summaries) {
out.insert(get_node_text(&lhs, source).to_string());
}
} else if lhs.kind() == "field_expression" && op_is_plain {
if let Some(base) = field_expr_dot_base(&lhs, source) {
let rhs = node.child_by_field_name("right");
if rhs_is_safe(rhs.as_ref(), out, source, summaries) {
out.insert(base);
}
}
}
}
}
_ => {}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
walk_pointer_aliases(&child, source, summaries, out);
}
}
}
fn initializer_is_safe_for_buffer(init: &Option<Node>) -> bool {
let Some(init) = init else {
return true;
};
matches!(
init.kind(),
"string_literal" | "concatenated_string" | "initializer_list" | "identifier"
)
}
fn extract_declarator_name(decl: &Node, source: &str) -> Option<String> {
let mut current = *decl;
loop {
match current.kind() {
"identifier" | "field_identifier" | "type_identifier" => {
return Some(get_node_text(¤t, source).to_string());
}
_ => {
if let Some(inner) = current.child_by_field_name("declarator") {
current = inner;
continue;
}
let mut next = None;
for i in 0..current.child_count() {
if let Some(c) = current.child(i) {
if c.is_named() && c.kind() != "number_literal" {
next = Some(c);
break;
}
}
}
match next {
Some(n) => current = n,
None => return None,
}
}
}
}
}
fn check_writes(
node: &Node,
var: &str,
safe_sources: &HashSet<String>,
source: &str,
summaries: &HashMap<String, FunctionSummary>,
string_macros: &HashMap<String, String>,
all_safe: &mut bool,
) {
if !*all_safe {
return;
}
match node.kind() {
"init_declarator" => {
if let Some(decl) = node.child_by_field_name("declarator") {
if declarator_names(&decl, source).iter().any(|n| n == var) {
let value = node.child_by_field_name("value");
if !rhs_is_safe(value.as_ref(), safe_sources, source, summaries) {
*all_safe = false;
return;
}
}
}
}
"assignment_expression" => {
let Some(lhs) = node.child_by_field_name("left") else {
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
check_writes(
&child,
var,
safe_sources,
source,
summaries,
string_macros,
all_safe,
);
if !*all_safe {
return;
}
}
}
return;
};
let lhs = strip_casts_and_parens(lhs);
if lhs.kind() == "identifier" && get_node_text(&lhs, source) == var {
let op_matches = node_operator(node, source)
.map(|op| op == "=")
.unwrap_or(true);
if op_matches {
let rhs = node.child_by_field_name("right");
if !rhs_is_safe(rhs.as_ref(), safe_sources, source, summaries) {
*all_safe = false;
return;
}
} else {
*all_safe = false;
return;
}
}
}
"call_expression" => {
if let Some(function) = node.child_by_field_name("function") {
let name = get_node_text(&function, source);
let ident = trailing_identifier(name);
if matches!(
ident,
"strcat" | "strcpy" | "strncat" | "strncpy" | "wcscat" | "wcscpy"
) {
if let Some(args) = node.child_by_field_name("arguments") {
let named: Vec<_> = (0..args.child_count())
.filter_map(|i| args.child(i))
.filter(|c| c.is_named())
.collect();
if let Some(first) = named.first() {
let first_stripped = strip_casts_and_parens(*first);
if first_stripped.kind() == "identifier"
&& get_node_text(&first_stripped, source) == var
{
let second = named.get(1);
let second_safe = match second {
Some(n) => {
let s = strip_casts_and_parens(*n);
match s.kind() {
"string_literal" | "concatenated_string" => true,
"identifier" => {
let nm = get_node_text(&s, source);
safe_sources.contains(nm)
|| const_eval::is_safe_command_macro(
string_macros,
nm,
)
}
_ => false,
}
}
None => false,
};
if !second_safe {
*all_safe = false;
return;
}
}
}
}
}
}
}
_ => {}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
check_writes(
&child,
var,
safe_sources,
source,
summaries,
string_macros,
all_safe,
);
if !*all_safe {
return;
}
}
}
}
fn rhs_is_safe(
rhs: Option<&Node>,
safe_sources: &HashSet<String>,
source: &str,
summaries: &HashMap<String, FunctionSummary>,
) -> bool {
let Some(rhs) = rhs else {
return true;
};
let rhs = strip_casts_and_parens(*rhs);
match rhs.kind() {
"string_literal" | "concatenated_string" => true,
"null" | "number_literal" => true,
"identifier" => safe_sources.contains(get_node_text(&rhs, source)),
"pointer_expression" | "unary_expression" => {
if let Some(arg) = rhs.child_by_field_name("argument") {
let a = strip_casts_and_parens(arg);
if a.kind() == "identifier" {
return safe_sources.contains(get_node_text(&a, source));
}
}
false
}
"call_expression" => call_is_clean(&rhs, source, summaries),
"field_expression" => field_expr_dot_base(&rhs, source)
.map(|base| safe_sources.contains(&base as &str))
.unwrap_or(false),
_ => false,
}
}
fn call_is_clean(call: &Node, source: &str, summaries: &HashMap<String, FunctionSummary>) -> bool {
let Some(func) = call.child_by_field_name("function") else {
return false;
};
let raw = get_node_text(&func, source);
let name = trailing_identifier(raw);
match summaries.get(name) {
Some(s) => !s.has_env03_taint_source && !s.returns_tainted && !s.has_relative_command_write,
None => false,
}
}
fn field_expr_dot_base(node: &Node, source: &str) -> Option<String> {
let mut is_dot = false;
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if !child.is_named() {
match get_node_text(&child, source) {
"." => {
is_dot = true;
break;
}
"->" => break,
_ => {}
}
}
}
}
if !is_dot {
return None;
}
node.child_by_field_name("argument").and_then(|arg| {
let base = strip_casts_and_parens(arg);
if base.kind() == "identifier" {
Some(get_node_text(&base, source).to_string())
} else {
None
}
})
}
fn node_operator<'a>(node: &'a Node<'a>, source: &'a str) -> Option<&'a str> {
if let Some(op) = node.child_by_field_name("operator") {
return Some(get_node_text(&op, source));
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if !child.is_named() {
let text = get_node_text(&child, source);
if text.contains('=') {
return Some(text);
}
}
}
}
None
}
fn declarator_names(decl: &Node, source: &str) -> Vec<String> {
let mut names = Vec::new();
if let Some(name) = extract_declarator_name(decl, source) {
names.push(name);
}
names
}