use super::super::{CertRule, RuleViolation};
use crate::analyze::cfg;
use crate::analyze::const_eval;
use crate::analyze::context::ProjectContext;
use crate::analyze::function_summary::FunctionSummary;
use crate::manifest::{RuleCategory, Severity};
use crate::utility::cert_c::ast_utils::get_node_text;
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use tree_sitter::Node;
pub struct Env33C {
project_aliases: RefCell<HashMap<String, String>>,
current_aliases: RefCell<HashMap<String, String>>,
function_summaries: RefCell<HashMap<String, FunctionSummary>>,
callers: RefCell<HashMap<String, HashSet<String>>>,
}
impl Env33C {
pub fn new() -> Self {
Self {
project_aliases: RefCell::new(HashMap::new()),
current_aliases: RefCell::new(HashMap::new()),
function_summaries: RefCell::new(HashMap::new()),
callers: RefCell::new(HashMap::new()),
}
}
}
impl Default for Env33C {
fn default() -> Self {
Self::new()
}
}
impl CertRule for Env33C {
fn rule_id(&self) -> &'static str {
"ENV33-C"
}
fn description(&self) -> &'static str {
"Do not call system()"
}
fn severity(&self) -> Severity {
Severity::High
}
fn category(&self) -> RuleCategory {
RuleCategory::Rule
}
fn cert_id(&self) -> &'static str {
"ENV33-C"
}
fn set_project_context(&self, context: &ProjectContext) {
*self.project_aliases.borrow_mut() = context.macro_aliases.clone();
*self.function_summaries.borrow_mut() = context.function_summaries.clone();
let mut callers: HashMap<String, HashSet<String>> = HashMap::new();
for (caller, callees) in &context.call_graph {
for callee in callees {
callers
.entry(callee.clone())
.or_default()
.insert(caller.clone());
}
}
*self.callers.borrow_mut() = callers;
}
fn check(&self, node: &Node, source: &str) -> Vec<RuleViolation> {
let mut aliases = self.project_aliases.borrow().clone();
aliases.extend(const_eval::collect_macro_aliases(node, source));
*self.current_aliases.borrow_mut() = aliases;
let mut violations = Vec::new();
self.check_node(node, source, &mut violations);
violations
}
}
const TAINTED_SOURCE_FUNCTIONS: &[&str] = &[
"recv", "recvfrom", "recvmsg", "fgets", "gets", "gets_s", "scanf", "fscanf", "sscanf", "wscanf", "fwscanf", "fread", "fgetc",
"fgetwc", "getchar", "getwchar", "getc", "getwc", "fgetws", "getenv", "_wgetenv", "read", "pread",
];
impl Env33C {
fn resolve_name(&self, name: &str) -> String {
let aliases = self.current_aliases.borrow();
if let Some(target) = aliases.get(name) {
target.clone()
} else {
name.to_string()
}
}
fn check_node(&self, node: &Node, source: &str, violations: &mut Vec<RuleViolation>) {
if node.kind() == "call_expression" {
if let Some(function) = node.child_by_field_name("function") {
let func_name = get_node_text(&function, source);
let resolved = self.resolve_name(&func_name);
if self.is_dangerous_function(&resolved) {
if self.is_safe_command_call(node, source) {
} else {
let suggestion = match resolved.as_str() {
"system" => "Use the exec() family of functions (execl, execv, etc.) instead, which provide better control and security",
"popen" | "_popen" => "Use pipe() and fork() with exec() family functions for better security",
_ => "Use safer alternatives like the exec() family of functions"
};
let display_name = if func_name != resolved {
format!("{} (macro for {})", func_name, resolved)
} else {
func_name.to_string()
};
violations.push(RuleViolation {
rule_id: self.rule_id().to_string(),
message: format!(
"Call to '{}' is prohibited. This function invokes a command \
processor which can lead to command injection vulnerabilities. \
The function executes arbitrary shell commands and is inherently \
dangerous when user input is involved.",
display_name
),
severity: self.severity(),
line: node.start_position().row + 1,
column: node.start_position().column + 1,
file_path: String::new(),
suggestion: Some(suggestion.to_string()),
requires_manual_review: None,
});
}
}
}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
self.check_node(&child, source, violations);
}
}
}
fn is_safe_command_call(&self, call_node: &Node, source: &str) -> bool {
if let Some(args) = call_node.child_by_field_name("arguments") {
for i in 0..args.child_count() {
if let Some(child) = args.child(i) {
let kind = child.kind();
if kind != "(" && kind != ")" && kind != "," {
if kind == "string_literal" {
return false; }
break;
}
}
}
}
let func_node = match Self::find_containing_function(call_node) {
Some(f) => f,
None => return false,
};
if self.function_has_tainted_source(&func_node, source) {
return false;
}
if Self::has_pointer_parameters(&func_node, source) {
return self.callers_are_all_clean(&func_node, source);
}
true
}
fn callers_are_all_clean(&self, func_node: &Node, source: &str) -> bool {
let Some(name) = cfg::get_function_name(func_node, source) else {
return false;
};
let callers = self.callers.borrow();
let Some(root_callers) = callers.get(name) else {
return false;
};
if root_callers.is_empty() {
return false;
}
let summaries = self.function_summaries.borrow();
let mut visited: HashSet<String> = HashSet::new();
let mut stack: Vec<String> = root_callers.iter().cloned().collect();
while let Some(current) = stack.pop() {
if !visited.insert(current.clone()) {
continue;
}
match summaries.get(¤t) {
Some(s) if !s.has_env03_taint_source && !s.returns_tainted => {}
_ => return false,
}
if let Some(next) = callers.get(¤t) {
for c in next {
if !visited.contains(c) {
stack.push(c.clone());
}
}
}
}
true
}
fn find_containing_function<'a>(node: &Node<'a>) -> Option<Node<'a>> {
let mut current = node.parent();
while let Some(n) = current {
if n.kind() == "function_definition" {
return Some(n);
}
current = n.parent();
}
None
}
fn has_pointer_parameters(func_node: &Node, source: &str) -> bool {
if let Some(declarator) = func_node.child_by_field_name("declarator") {
Self::find_pointer_params_in_subtree(&declarator, source)
} else {
false
}
}
fn find_pointer_params_in_subtree(node: &Node, source: &str) -> bool {
if node.kind() == "parameter_list" {
for i in 0..node.child_count() {
if let Some(param) = node.child(i) {
if param.kind() == "parameter_declaration" {
let param_text = get_node_text(¶m, source);
if param_text.trim() == "void" {
continue;
}
if param_text.contains('*') || param_text.contains('[') {
return true;
}
}
}
}
return false;
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if Self::find_pointer_params_in_subtree(&child, source) {
return true;
}
}
}
false
}
fn function_has_tainted_source(&self, func_node: &Node, source: &str) -> bool {
self.scan_for_tainted_calls(func_node, source)
}
fn scan_for_tainted_calls(&self, node: &Node, source: &str) -> bool {
if node.kind() == "call_expression" {
if let Some(function) = node.child_by_field_name("function") {
let name = get_node_text(&function, source);
let resolved = self.resolve_name(&name);
if TAINTED_SOURCE_FUNCTIONS.contains(&resolved.as_str()) {
return true;
}
if TAINTED_SOURCE_FUNCTIONS.contains(&name) {
return true;
}
}
}
for i in 0..node.child_count() {
if let Some(child) = node.child(i) {
if self.scan_for_tainted_calls(&child, source) {
return true;
}
}
}
false
}
fn is_dangerous_function(&self, name: &str) -> bool {
matches!(
name,
"system"
| "popen"
| "_popen"
| "_execl"
| "_execle"
| "_execlp"
| "_execv"
| "_execve"
| "_execvp"
| "_spawnl"
| "_spawnle"
| "_spawnlp"
| "_spawnv"
| "_spawnve"
| "_spawnvp"
)
}
}