use regex::Regex;
use std::sync::LazyLock;
#[derive(Debug, Clone, PartialEq)]
pub enum TransformKind {
Attribute,
Extension,
Typeof,
StatementExpression,
Asm,
Restrict,
Inline,
Volatile,
TypeofUnderscore,
AlignofSizeof,
}
#[derive(Debug, Clone)]
pub struct Transformation {
pub original_start: usize,
pub original_length: usize,
pub transformed_start: usize,
pub transformed_length: usize,
pub kind: TransformKind,
pub original_text: String,
}
#[derive(Debug)]
pub struct NeutralizedSource {
pub code: String,
pub transformations: Vec<Transformation>,
}
pub struct GccNeutralizer {
}
static RE_ATTRIBUTE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"__attribute__\s*\(\(").unwrap());
static RE_EXTENSION: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__extension__\s*").unwrap());
static RE_TYPEOF: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"typeof\s*\(").unwrap());
static RE_TYPEOF_UNDERSCORE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"__typeof__\s*\(").unwrap());
static RE_TYPEOF_SINGLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__typeof\s*\(").unwrap());
static RE_ASM: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"__asm__\s*(?:volatile\s*)?\(").unwrap());
static RE_ASM_VOLATILE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"__asm\s+volatile\s*\(").unwrap());
static RE_RESTRICT: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__restrict\s+").unwrap());
static RE_RESTRICT_UNDERSCORE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"__restrict__\s+").unwrap());
static RE_INLINE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__inline__\s+").unwrap());
static RE_INLINE_SINGLE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__inline\s+").unwrap());
static RE_VOLATILE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"__volatile__\s+").unwrap());
static RE_VOLATILE_SINGLE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"__volatile\s+").unwrap());
static RE_STATEMENT_EXPR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\(\{").unwrap());
static RE_BUILTIN_OFFSETOF: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"__builtin_offsetof\s*\(").unwrap());
static RE_BUILTIN_TYPES_COMPATIBLE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"__builtin_types_compatible_p\s*\(").unwrap());
impl Default for GccNeutralizer {
fn default() -> Self {
Self::new()
}
}
impl GccNeutralizer {
pub fn new() -> Self {
Self {}
}
pub fn neutralize(&self, source: &str) -> NeutralizedSource {
let mut code = source.to_string();
let mut transformations = Vec::new();
while let Some(m) = RE_ATTRIBUTE.find(&code) {
let start = m.start();
if let Some((end, _original)) = Self::find_double_paren_end(&code, start + m.len()) {
let original_text = code[start..end].to_string();
transformations.push(Transformation {
original_start: start,
original_length: end - start,
transformed_start: start,
transformed_length: 0,
kind: TransformKind::Attribute,
original_text,
});
code = format!("{}{}", &code[..start], &code[end..]);
} else {
break;
}
}
while let Some(m) = RE_EXTENSION.find(&code) {
let start = m.start();
let end = m.end();
transformations.push(Transformation {
original_start: start,
original_length: end - start,
transformed_start: start,
transformed_length: 0,
kind: TransformKind::Extension,
original_text: code[start..end].to_string(),
});
code = format!("{}{}", &code[..start], &code[end..]);
}
for regex in [&*RE_TYPEOF, &*RE_TYPEOF_UNDERSCORE, &*RE_TYPEOF_SINGLE] {
while let Some(m) = regex.find(&code) {
let start = m.start();
if let Some((end, _)) = Self::find_matching_paren(&code, m.end() - 1) {
let original_text = code[start..end].to_string();
transformations.push(Transformation {
original_start: start,
original_length: end - start,
transformed_start: start,
transformed_length: 3, kind: TransformKind::Typeof,
original_text,
});
code = format!("{}int{}", &code[..start], &code[end..]);
} else {
break;
}
}
}
for regex in [&*RE_ASM, &*RE_ASM_VOLATILE] {
while let Some(m) = regex.find(&code) {
let start = m.start();
if let Some((end, _)) = Self::find_matching_paren(&code, m.end() - 1) {
let stmt_end = code[end..].find(';').map(|i| end + i + 1).unwrap_or(end);
let original_text = code[start..stmt_end].to_string();
transformations.push(Transformation {
original_start: start,
original_length: stmt_end - start,
transformed_start: start,
transformed_length: 4, kind: TransformKind::Asm,
original_text,
});
code = format!("{}0{}", &code[..start], &code[stmt_end..]);
} else {
break;
}
}
}
for regex in [&*RE_RESTRICT, &*RE_RESTRICT_UNDERSCORE] {
while let Some(m) = regex.find(&code) {
let start = m.start();
let end = m.end();
transformations.push(Transformation {
original_start: start,
original_length: end - start,
transformed_start: start,
transformed_length: 0,
kind: TransformKind::Restrict,
original_text: code[start..end].to_string(),
});
code = format!("{}{}", &code[..start], &code[end..]);
}
}
for regex in [&*RE_INLINE, &*RE_INLINE_SINGLE] {
while let Some(m) = regex.find(&code) {
let start = m.start();
let end = m.end();
transformations.push(Transformation {
original_start: start,
original_length: end - start,
transformed_start: start,
transformed_length: 0,
kind: TransformKind::Inline,
original_text: code[start..end].to_string(),
});
code = format!("{}{}", &code[..start], &code[end..]);
}
}
for regex in [&*RE_VOLATILE, &*RE_VOLATILE_SINGLE] {
while let Some(m) = regex.find(&code) {
let start = m.start();
let end = m.end();
transformations.push(Transformation {
original_start: start,
original_length: end - start,
transformed_start: start,
transformed_length: 0,
kind: TransformKind::Volatile,
original_text: code[start..end].to_string(),
});
code = format!("{}{}", &code[..start], &code[end..]);
}
}
while let Some(m) = RE_STATEMENT_EXPR.find(&code) {
let start = m.start();
if let Some((end, _)) = Self::find_statement_expr_end(&code, start) {
let original_text = code[start..end].to_string();
transformations.push(Transformation {
original_start: start,
original_length: end - start,
transformed_start: start,
transformed_length: 3, kind: TransformKind::StatementExpression,
original_text,
});
code = format!("{}(0){}", &code[..start], &code[end..]);
} else {
break;
}
}
while let Some(m) = RE_BUILTIN_OFFSETOF.find(&code) {
let start = m.start();
if let Some((end, _)) = Self::find_matching_paren(&code, m.end() - 1) {
let original_text = code[start..end].to_string();
transformations.push(Transformation {
original_start: start,
original_length: end - start,
transformed_start: start,
transformed_length: 1, kind: TransformKind::AlignofSizeof,
original_text,
});
code = format!("{}0{}", &code[..start], &code[end..]);
} else {
break;
}
}
while let Some(m) = RE_BUILTIN_TYPES_COMPATIBLE.find(&code) {
let start = m.start();
if let Some((end, _)) = Self::find_matching_paren(&code, m.end() - 1) {
let original_text = code[start..end].to_string();
transformations.push(Transformation {
original_start: start,
original_length: end - start,
transformed_start: start,
transformed_length: 1, kind: TransformKind::AlignofSizeof,
original_text,
});
code = format!("{}0{}", &code[..start], &code[end..]);
} else {
break;
}
}
NeutralizedSource {
code,
transformations,
}
}
fn find_double_paren_end(code: &str, start: usize) -> Option<(usize, String)> {
let bytes = code.as_bytes();
let mut depth = 2; let mut i = start;
while i < bytes.len() && depth > 0 {
match bytes[i] {
b'(' => depth += 1,
b')' => depth -= 1,
b'"' => {
i += 1;
while i < bytes.len() && bytes[i] != b'"' {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
b'\'' => {
i += 1;
while i < bytes.len() && bytes[i] != b'\'' {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
if depth == 0 {
Some((i, code[start..i].to_string()))
} else {
None
}
}
fn find_matching_paren(code: &str, start: usize) -> Option<(usize, String)> {
let bytes = code.as_bytes();
if start >= bytes.len() || bytes[start] != b'(' {
return None;
}
let mut depth = 1;
let mut i = start + 1;
while i < bytes.len() && depth > 0 {
match bytes[i] {
b'(' => depth += 1,
b')' => depth -= 1,
b'"' => {
i += 1;
while i < bytes.len() && bytes[i] != b'"' {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
b'\'' => {
i += 1;
while i < bytes.len() && bytes[i] != b'\'' {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
if depth == 0 {
Some((i, code[start..i].to_string()))
} else {
None
}
}
fn find_statement_expr_end(code: &str, start: usize) -> Option<(usize, String)> {
let bytes = code.as_bytes();
if start + 1 >= bytes.len() || bytes[start] != b'(' || bytes[start + 1] != b'{' {
return None;
}
let mut paren_depth = 1;
let mut brace_depth = 1;
let mut i = start + 2;
while i < bytes.len() && (paren_depth > 0 || brace_depth > 0) {
match bytes[i] {
b'(' => paren_depth += 1,
b')' => paren_depth -= 1,
b'{' => brace_depth += 1,
b'}' => brace_depth -= 1,
b'"' => {
i += 1;
while i < bytes.len() && bytes[i] != b'"' {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
b'\'' => {
i += 1;
while i < bytes.len() && bytes[i] != b'\'' {
if bytes[i] == b'\\' {
i += 1;
}
i += 1;
}
}
_ => {}
}
i += 1;
}
if paren_depth == 0 && brace_depth == 0 {
Some((i, code[start..i].to_string()))
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_neutralize_attribute() {
let neutralizer = GccNeutralizer::new();
let source = "void __attribute__((packed)) foo(void) {}";
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("__attribute__"));
assert!(result.code.contains("void foo(void) {}"));
assert!(!result.transformations.is_empty());
assert_eq!(result.transformations[0].kind, TransformKind::Attribute);
}
#[test]
fn test_neutralize_attribute_nested() {
let neutralizer = GccNeutralizer::new();
let source = "void __attribute__((unused, aligned(16))) bar(void) {}";
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("__attribute__"));
assert!(result.code.contains("void bar(void) {}"));
}
#[test]
fn test_neutralize_extension() {
let neutralizer = GccNeutralizer::new();
let source = "__extension__ union { int x; float y; };";
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("__extension__"));
assert!(result.code.contains("union { int x; float y; };"));
}
#[test]
fn test_neutralize_typeof() {
let neutralizer = GccNeutralizer::new();
let source = "typeof(foo) bar;";
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("typeof"));
assert!(result.code.contains("int bar;"));
}
#[test]
fn test_neutralize_typeof_underscore() {
let neutralizer = GccNeutralizer::new();
let source = "__typeof__(x) y;";
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("__typeof__"));
assert!(result.code.contains("int y;"));
}
#[test]
fn test_neutralize_asm() {
let neutralizer = GccNeutralizer::new();
let source = "void foo(void) { __asm__ volatile(\"nop\"); }";
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("__asm__"));
assert!(result.code.contains("{ 0 }"));
}
#[test]
fn test_neutralize_restrict() {
let neutralizer = GccNeutralizer::new();
let source = "void foo(int * __restrict p) {}";
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("__restrict"));
assert!(result.code.contains("int * p"));
}
#[test]
fn test_neutralize_inline() {
let neutralizer = GccNeutralizer::new();
let source = "__inline__ void foo(void) {}";
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("__inline__"));
assert!(result.code.contains("void foo(void)"));
}
#[test]
fn test_neutralize_statement_expression() {
let neutralizer = GccNeutralizer::new();
let source = "int x = ({ int y = 5; y + 1; });";
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("({"));
assert!(result.code.contains("int x = (0);"));
}
#[test]
fn test_neutralize_builtin_offsetof() {
let neutralizer = GccNeutralizer::new();
let source = "int off = __builtin_offsetof(struct foo, bar);";
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("__builtin_offsetof"));
assert!(result.code.contains("int off = 0;"));
}
#[test]
fn test_neutralize_multiple() {
let neutralizer = GccNeutralizer::new();
let source = r#"
__extension__ struct {
__attribute__((packed)) int x;
} __attribute__((aligned(16)));
"#;
let result = neutralizer.neutralize(source);
assert!(!result.code.contains("__extension__"));
assert!(!result.code.contains("__attribute__"));
}
#[test]
fn test_transformation_tracking() {
let neutralizer = GccNeutralizer::new();
let source = "__attribute__((unused)) int x;";
let result = neutralizer.neutralize(source);
assert!(!result.transformations.is_empty());
let trans = &result.transformations[0];
assert_eq!(trans.kind, TransformKind::Attribute);
assert!(trans.original_text.contains("__attribute__"));
}
#[test]
fn test_no_false_positives() {
let neutralizer = GccNeutralizer::new();
let source = "int attribute_count; int typeof_var;";
let result = neutralizer.neutralize(source);
assert!(result.code.contains("attribute_count"));
assert!(result.code.contains("typeof_var"));
}
#[test]
fn test_string_literal_handling() {
let neutralizer = GccNeutralizer::new();
let source = "char *s = \"test\";";
let result = neutralizer.neutralize(source);
assert!(result.code.contains("\"test\""));
}
}